PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-cuda.c File Reference

This file has the source code for a component that enables PAPI-C to access hardware monitoring counters for GPU devices through the CUPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Macros

#define CUDAAPI   __attribute__((weak))
 
#define CUDARTAPI   __attribute__((weak))
 
#define CUPTIAPI   __attribute__((weak))
 

Functions

static int enumEventDomains (CUdevice dev, int deviceId)
 
 if (eventId==NULL)
 
 memset (eventId, 0, size)
 
 if (device[deviceId].domain[domainId].event==NULL)
 
 CHECK_CUPTI_ERROR (err,"cuptiEventDomainEnumEvents")
 
 for (id=0;id< device[deviceId].domain[domainId].eventCount;id++)
 
 free (eventId)
 
static int createNativeEvents (void)
 
static int getEventValue (long long *counts, CUpti_EventGroup eventGroup, AddedEvents_t addedEvents)
 
int CUDA_init_thread (hwd_context_t *ctx)
 
int CUDA_init_component (int cidx)
 
static int linkCudaLibraries ()
 
int CUDA_init_control_state (hwd_control_state_t *ctrl)
 
int CUDA_start (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
int CUDA_stop (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
int CUDA_read (hwd_context_t *ctx, hwd_control_state_t *ctrl, long_long **events, int flags)
 
int CUDA_shutdown_thread (hwd_context_t *ctx)
 
int CUDA_shutdown_component (void)
 
int CUDA_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
int CUDA_update_control_state (hwd_control_state_t *ptr, NativeInfo_t *native, int count, hwd_context_t *ctx)
 
int CUDA_set_domain (hwd_control_state_t *cntrl, int domain)
 
int CUDA_reset (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
int CUDA_cleanup_eventset (hwd_control_state_t *ctrl)
 
int CUDA_ntv_enum_events (unsigned int *EventCode, int modifier)
 
int CUDA_ntv_code_to_name (unsigned int EventCode, char *name, int len)
 
int CUDA_ntv_code_to_descr (unsigned int EventCode, char *name, int len)
 
int CUDA_ntv_code_to_bits (unsigned int EventCode, hwd_register_t *bits)
 

Variables

void(* _dl_non_dynamic_init )(void)
 
static int
 
CUpti_EventID * eventId = NULL
 
size_t size = 0
 
uint32_t id = 0
 
device[deviceId] domain[domainId] event
 
 err
 
 totalEventCount = device[deviceId].domain[domainId].eventCount
 
 return
 
papi_vector_t _cuda_vector
 

Detailed Description

Author
Heike Jagode (in collaboration with Robert Dietrich, TU Dresden) jagod.nosp@m.e@ee.nosp@m.cs.ut.nosp@m.k.ed.nosp@m.u

Definition in file linux-cuda.c.

Macro Definition Documentation

#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define CUPTIAPI   __attribute__((weak))

Function Documentation

CHECK_CUPTI_ERROR ( err  ,
"cuptiEventDomainEnumEvents"   
)

Here is the caller graph for this function:

static int createNativeEvents ( void  )
static

Definition at line 380 of file linux-cuda.c.

381 {
382  int deviceId, id = 0;
383  uint32_t domainId, eventId;
384  int cuptiDomainId;
385  int i;
386  int devNameLen;
387 
388  /* create events for every GPU device and every domain per device */
389  for ( deviceId = 0; deviceId < deviceCount; deviceId++ ) {
390  /* for the event names, replace blanks in the device name with underscores */
391  devNameLen = strlen( device[deviceId].name );
392  for ( i = 0; i < devNameLen; i++ )
393  if ( device[deviceId].name[i] == ' ' )
394  device[deviceId].name[i] = '_';
395 
396  for ( domainId = 0; domainId < device[deviceId].domainCount;
397  domainId++ ) {
398  cuptiDomainId = device[deviceId].domain[domainId].domainId;
399 
400  for ( eventId = 0;
401  eventId < device[deviceId].domain[domainId].eventCount;
402  eventId++ ) {
403  unsigned int evtNameLen = strlen(device[deviceId].name) + strlen(device[deviceId].domain[domainId].name) +
404  strlen(device[deviceId].domain[domainId].event[eventId].name);
405  if (evtNameLen + 4 > sizeof(cuda_native_table[id].name)) {
406  SUBDBG("Event name too long to fit in cuda_native_table.name, event omitted: available space: %lu, space needed: %d\n",
407  sizeof(cuda_native_table[id].name), evtNameLen+4);
408  SUBDBG("device: %s, domain: %s, event: %s\n", device[deviceId].name, device[deviceId].domain[domainId].name,
409  device[deviceId].domain[domainId].event[eventId].name);
410  continue;
411  }
412  /* Save native event data */
413  sprintf( cuda_native_table[id].name,
414  "%s:%s:%s",
415  device[deviceId].name,
416  device[deviceId].domain[domainId].name,
417  device[deviceId].domain[domainId].event[eventId].name );
418 
419  strncpy( cuda_native_table[id].description,
420  device[deviceId].domain[domainId].event[eventId].desc,
421  PAPI_2MAX_STR_LEN-1 );
423 
424  /* The selector has to be !=0 . Starts with 1 */
426 
427  /* store event ID */
429  device[deviceId].domain[domainId].event[eventId].eventId;
430 
431  /* increment the table index counter */
432  id++;
433  }
434  }
435  }
436 
437  /* Return the number of events created */
438  return id;
439 }
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
char description[PAPI_2MAX_STR_LEN]
Definition: linux-cuda.h:97
static int deviceCount
Definition: linux-cuda.h:136
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
CUpti_EventID eventId
Definition: linux-cuda.h:51
CUpti_EventID eventId
Definition: linux-cuda.h:88
#define PAPI_2MAX_STR_LEN
Definition: papi.h:464
int i
Definition: fileop.c:140
uint32_t eventCount
Definition: linux-cuda.h:61
char description[PAPI_MAX_STR_LEN]
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
EventData_t * event
Definition: linux-cuda.h:62
uint32_t domainCount
Definition: linux-cuda.h:70
uint32_t id
Definition: linux-cuda.c:293
DomainData_t * domain
Definition: linux-cuda.h:71
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
char name[PAPI_MIN_STR_LEN]
Definition: linux-cuda.h:69
CUpti_EventDomainID domainId
Definition: linux-cuda.h:59
CUpti_EventID * eventId
Definition: linux-cuda.c:291
CUDA_register_t resources
Definition: linux-cuda.h:95
unsigned int selector
Definition: linux-cuda.h:86

Here is the call graph for this function:

Here is the caller graph for this function:

int CUDA_cleanup_eventset ( hwd_control_state_t ctrl)

Definition at line 1059 of file linux-cuda.c.

1060 {
1061  ( void ) ctrl;
1062 
1063  // TODO: after cleanup_eventset() which destroys the eventset, update_control_state()
1064  // is called, which operates on the already destroyed eventset. Bad!
1065 #if 0
1066  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
1067  CUptiResult cuptiErr = CUPTI_SUCCESS;
1068 
1069  /* Disable the CUDA eventGroup;
1070  it also frees the perfmon hardware on the GPU */
1071  cuptiErr = (*cuptiEventGroupDisablePtr)( CUDA_ctrl->eventGroup );
1072  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );
1073 
1074  /* Call the CuPTI cleaning function before leaving */
1075  cuptiErr = (*cuptiEventGroupDestroyPtr)( CUDA_ctrl->eventGroup );
1076  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDestroy" );
1077 #endif
1078  return ( PAPI_OK );
1079 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

int CUDA_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

Definition at line 939 of file linux-cuda.c.

940 {
941  ( void ) ctx;
942  ( void ) code;
943  ( void ) option;
944  return ( PAPI_OK );
945 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
int CUDA_init_component ( int  cidx)

Definition at line 522 of file linux-cuda.c.

523 {
524  SUBDBG ("Entry: cidx: %d\n", cidx);
525  CUresult cuErr = CUDA_SUCCESS;
526 
527  /* link in all the cuda libraries and resolve the symbols we need to use */
528  if (linkCudaLibraries() != PAPI_OK) {
529  SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
530  SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
531  return (PAPI_ENOSUPP);
532  }
533 
534  /* Create dynamic event table */
535  NUM_EVENTS = detectDevice( );
536  if (NUM_EVENTS < 0) {
537  strncpy(_cuda_vector.cmp_info.disabled_reason, "Call to detectDevice failed.",PAPI_MAX_STR_LEN);
538  return (PAPI_ENOSUPP);
539  }
540  /* TODO: works only for one device right now;
541  need to find out if user can use 2 or more devices at same time */
542 
543  /* want create a CUDA context for either the default device or
544  the device specified with cudaSetDevice() in user code */
545  if ( CUDA_SUCCESS != (*cudaGetDevicePtr)( &currentDeviceID ) ) {
546  strncpy(_cuda_vector.cmp_info.disabled_reason, "No NVIDIA GPU's found.",PAPI_MAX_STR_LEN);
547  return ( PAPI_ENOSUPP );
548  }
549 
550  if ( getenv( "PAPI_VERBOSE" ) ) {
551  printf( "DEVICE USED: %s (%d)\n", device[currentDeviceID].name,
552  currentDeviceID );
553  }
554 
555  /* get the CUDA context from the calling CPU thread */
556  cuErr = (*cuCtxGetCurrentPtr)( &cuCtx );
557 
558  /* if no CUDA context is bound to the calling CPU thread yet, create one */
559  if ( cuErr != CUDA_SUCCESS || cuCtx == NULL ) {
560  cuErr = (*cuCtxCreatePtr)( &cuCtx, 0, device[currentDeviceID].dev );
561  CHECK_CU_ERROR( cuErr, "cuCtxCreate" );
562  }
563 
564  /* cuCtxGetCurrent() can return a non-null context that is not valid
565  because the context has not yet been initialized.
566  Here is a workaround:
567  cudaFree(NULL) forces the context to be initialized
568  if cudaFree(NULL) returns success then we are able to use the context in subsequent calls
569  if cudaFree(NULL) returns an error (or subsequent cupti* calls) then the context is not usable,
570  and will never be useable */
571  if ( CUDA_SUCCESS != (*cudaFreePtr)( NULL ) ) {
572  strncpy(_cuda_vector.cmp_info.disabled_reason, "Problem initializing CUDA context.",PAPI_MAX_STR_LEN);
573  return ( PAPI_ENOSUPP );
574  }
575 
576  /* Create dynamic event table */
578  malloc( sizeof ( CUDA_native_event_entry_t ) * NUM_EVENTS );
579  if ( cuda_native_table == NULL ) {
580  perror( "malloc(): Failed to allocate memory to events table" );
581  strncpy(_cuda_vector.cmp_info.disabled_reason, "Failed to allocate memory to events table.",PAPI_MAX_STR_LEN);
582  return ( PAPI_ENOSUPP );
583  }
584 
585  if ( NUM_EVENTS != createNativeEvents( ) ) {
586  strncpy(_cuda_vector.cmp_info.disabled_reason, "Error creating CUDA event list.",PAPI_MAX_STR_LEN);
587  return ( PAPI_ENOSUPP );
588  }
589 
590  /* Export the component id */
592 
593  /* Number of events */
595 
596  return ( PAPI_OK );
597 }
CUdevice dev
Definition: linux-cuda.h:68
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.h:26
char * getenv()
#define PAPI_ENOSUPP
Definition: papi.h:269
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define printf
Definition: papi_test.h:125
static int createNativeEvents(void)
Definition: linux-cuda.c:380
#define NUM_EVENTS
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:633
static CUcontext cuCtx
Definition: linux-cuda.h:156
static int currentDeviceID
Definition: linux-cuda.h:139
static int cidx
Definition: event_info.c:40
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
papi_vector_t _cuda_vector
Definition: linux-cuda.c:1160
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
Definition: linux-cuda.h:93
static int linkCudaLibraries()
Definition: linux-cuda.c:607
#define PAPI_MAX_STR_LEN
Definition: papi.h:463

Here is the call graph for this function:

int CUDA_init_control_state ( hwd_control_state_t ctrl)

Definition at line 795 of file linux-cuda.c.

796 {
797  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
798  CUptiResult cuptiErr = CUPTI_SUCCESS;
799  int i;
800 
801  /* allocate memory for the list of events that are added to the CuPTI eventGroup */
802  CUDA_ctrl->addedEvents.list = malloc( sizeof ( int ) * NUM_EVENTS );
803  if ( CUDA_ctrl->addedEvents.list == NULL ) {
804  perror
805  ( "malloc(): Failed to allocate memory to table of events that are added to CuPTI eventGroup" );
806  return ( PAPI_ENOSUPP );
807  }
808 
809  /* initialize the event list */
810  for ( i = 0; i < NUM_EVENTS; i++ )
811  CUDA_ctrl->addedEvents.list[i] = 0;
812 
813 
814 
815  cuptiErr = (*cuptiEventGroupCreatePtr)( cuCtx, &CUDA_ctrl->eventGroup, 0 );
816  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupCreate" );
817 
818  return PAPI_OK;
819 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
#define PAPI_ENOSUPP
Definition: papi.h:269
return PAPI_OK
Definition: linux-nvml.c:458
#define NUM_EVENTS
int i
Definition: fileop.c:140
static CUcontext cuCtx
Definition: linux-cuda.h:156
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

int CUDA_init_thread ( hwd_context_t ctx)

Definition at line 498 of file linux-cuda.c.

499 {
500  ( void ) ctx;
501 
502  return PAPI_OK;
503 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
int CUDA_ntv_code_to_bits ( unsigned int  EventCode,
hwd_register_t bits 
)

Definition at line 1145 of file linux-cuda.c.

1146 {
1147  int index = EventCode;
1148 
1149  memcpy( ( CUDA_register_t * ) bits,
1150  &( cuda_native_table[index].resources ),
1151  sizeof ( CUDA_register_t ) );
1152 
1153  return ( PAPI_OK );
1154 }
return PAPI_OK
Definition: linux-nvml.c:458
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
int CUDA_ntv_code_to_descr ( unsigned int  EventCode,
char *  name,
int  len 
)

Definition at line 1132 of file linux-cuda.c.

1133 {
1134  int index = EventCode;
1135 
1136  strncpy( name, cuda_native_table[index].description, len );
1137  return ( PAPI_OK );
1138 }
return PAPI_OK
Definition: linux-nvml.c:458
char description[PAPI_MAX_STR_LEN]
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
char * name
Definition: iozone.c:23648
int CUDA_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)

Definition at line 1119 of file linux-cuda.c.

1120 {
1121  int index = EventCode;
1122 
1123  strncpy( name, cuda_native_table[index].name, len );
1124  return ( PAPI_OK );
1125 }
return PAPI_OK
Definition: linux-nvml.c:458
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
char * name
Definition: iozone.c:23648
int CUDA_ntv_enum_events ( unsigned int EventCode,
int  modifier 
)

Definition at line 1086 of file linux-cuda.c.

1087 {
1088 
1089  switch ( modifier ) {
1090  case PAPI_ENUM_FIRST:
1091  *EventCode = 0;
1092 
1093  return ( PAPI_OK );
1094  break;
1095 
1096  case PAPI_ENUM_EVENTS:
1097  {
1098  int index = *EventCode;
1099 
1100  if ( index < NUM_EVENTS - 1 ) {
1101  *EventCode = *EventCode + 1;
1102  return ( PAPI_OK );
1103  } else
1104  return ( PAPI_ENOEVNT );
1105 
1106  break;
1107  }
1108  default:
1109  return ( PAPI_EINVAL );
1110  }
1111  return ( PAPI_EINVAL );
1112 }
#define PAPI_ENOEVNT
Definition: papi.h:258
return PAPI_OK
Definition: linux-nvml.c:458
return PAPI_EINVAL
Definition: linux-nvml.c:408
#define NUM_EVENTS
int CUDA_read ( hwd_context_t ctx,
hwd_control_state_t ctrl,
long_long **  events,
int  flags 
)

Definition at line 865 of file linux-cuda.c.

867 {
868  ( void ) ctx;
869  ( void ) flags;
870  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
871 
872 
873  if ( 0 != getEventValue( CUDA_ctrl->counts, CUDA_ctrl->eventGroup, CUDA_ctrl->addedEvents ) )
874  return ( PAPI_ENOSUPP );
875 
876  *events = CUDA_ctrl->counts;
877 
878  return ( PAPI_OK );
879 }
long long flags
Definition: iozone.c:12330
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
#define PAPI_ENOSUPP
Definition: papi.h:269
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
char events[MAX_EVENTS][BUFSIZ]
long long counts[CUDA_MAX_COUNTERS]
Definition: linux-cuda.h:111
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109
static int getEventValue(long long *counts, CUpti_EventGroup eventGroup, AddedEvents_t addedEvents)
Definition: linux-cuda.c:446

Here is the call graph for this function:

int CUDA_reset ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 1042 of file linux-cuda.c.

1043 {
1044  ( void ) ctx;
1045  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
1046  CUptiResult cuptiErr = CUPTI_SUCCESS;
1047 
1048  /* Resets all events in the CuPTI eventGroup to zero */
1049  cuptiErr = (*cuptiEventGroupResetAllEventsPtr)( CUDA_ctrl->eventGroup );
1050  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );
1051 
1052  return ( PAPI_OK );
1053 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

int CUDA_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

Definition at line 1017 of file linux-cuda.c.

1018 {
1019  int found = 0;
1020  ( void ) cntrl;
1021 
1022  if ( PAPI_DOM_USER & domain )
1023  found = 1;
1024 
1025  if ( PAPI_DOM_KERNEL & domain )
1026  found = 1;
1027 
1028  if ( PAPI_DOM_OTHER & domain )
1029  found = 1;
1030 
1031  if ( !found )
1032  return ( PAPI_EINVAL );
1033 
1034  return ( PAPI_OK );
1035 }
#define PAPI_DOM_KERNEL
Definition: papi.h:298
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_DOM_USER
Definition: papi.h:296
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
long long found
Definition: libasync.c:735
#define PAPI_DOM_OTHER
Definition: papi.h:299
int CUDA_shutdown_component ( void  )

Definition at line 896 of file linux-cuda.c.

897 {
898  CUresult cuErr = CUDA_SUCCESS;
899 
900  /* if running a threaded application, we need to make sure that
901  a thread doesn't free the same memory location(s) more than once */
902  if ( CUDA_FREED == 0 ) {
903  uint32_t j;
904  int i;
905 
906  CUDA_FREED = 1;
907 
908  /* deallocate all the memory */
909  for ( i = 0; i < deviceCount; i++ ) {
910  for ( j = 0; j < device[i].domainCount; j++ )
911  free( device[i].domain[j].event );
912 
913  free( device[i].domain );
914  }
915 
916  free( device );
918 
919  /* destroy floating CUDA context */
920  cuErr = (*cuCtxDestroyPtr)( cuCtx );
921  if ( cuErr != CUDA_SUCCESS )
922  return ( PAPI_ENOSUPP ); // Not supported
923  }
924 
925  // close the dynamic libraries needed by this component (opened in the init substrate call)
926  dlclose(dl1);
927  dlclose(dl2);
928  dlclose(dl3);
929 
930  return ( PAPI_OK );
931 }
#define PAPI_ENOSUPP
Definition: papi.h:269
static int deviceCount
Definition: linux-cuda.h:136
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
return PAPI_OK
Definition: linux-nvml.c:458
int i
Definition: fileop.c:140
static CUcontext cuCtx
Definition: linux-cuda.h:156
free(dummyfile[xx])
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
uint32_t domainCount
Definition: linux-cuda.h:70
static DeviceData_t * device
Definition: linux-cuda.h:155
long j
Definition: iozone.c:19135
static int CUDA_FREED
Definition: linux-cuda.h:140

Here is the call graph for this function:

int CUDA_shutdown_thread ( hwd_context_t ctx)

Definition at line 885 of file linux-cuda.c.

886 {
887  CUDA_context_t *CUDA_ctx = (CUDA_context_t*)ctx;
888  free( CUDA_ctx->state.addedEvents.list );
889  return (PAPI_OK);
890 }
CUDA_control_state_t state
Definition: linux-cuda.h:118
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
return PAPI_OK
Definition: linux-nvml.c:458
free(dummyfile[xx])

Here is the call graph for this function:

int CUDA_start ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 826 of file linux-cuda.c.

827 {
828  ( void ) ctx;
829  int i;
830  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
831  CUptiResult cuptiErr = CUPTI_SUCCESS;
832 
833  // reset all event values to 0
834  for ( i = 0; i < NUM_EVENTS; i++ )
835  CUDA_ctrl->counts[i] = 0;
836 
837  cuptiErr = (*cuptiEventGroupEnablePtr)( CUDA_ctrl->eventGroup );
838  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupEnable" );
839 
840  /* Resets all events in the CuPTI eventGroup to zero */
841  cuptiErr = (*cuptiEventGroupResetAllEventsPtr)( CUDA_ctrl->eventGroup );
842  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );
843 
844  return ( PAPI_OK );
845 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define NUM_EVENTS
int i
Definition: fileop.c:140
long long counts[CUDA_MAX_COUNTERS]
Definition: linux-cuda.h:111
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

int CUDA_stop ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 852 of file linux-cuda.c.

853 {
854  ( void ) ctx;
855  ( void ) ctrl;
856 
857  return ( PAPI_OK );
858 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
int CUDA_update_control_state ( hwd_control_state_t ptr,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
)

Definition at line 955 of file linux-cuda.c.

958 {
959  ( void ) ctx;
960  CUDA_control_state_t * CUDA_ptr = ( CUDA_control_state_t * ) ptr;
961  int index, i;
962  CUptiResult cuptiErr = CUPTI_SUCCESS;
963 
964  /* Disable the CUDA eventGroup;
965  it also frees the perfmon hardware on the GPU */
966  cuptiErr = (*cuptiEventGroupDisablePtr)( CUDA_ptr->eventGroup );
967  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );
968 
969  cuptiErr = (*cuptiEventGroupRemoveAllEventsPtr)( CUDA_ptr->eventGroup );
970  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupRemoveAllEvents" );
971 
972  // otherwise, add the events to the eventset
973  for ( i = 0; i < count; i++ ) {
974 
975  index = native[i].ni_event;
976  native[i].ni_position = index;
977 
978  /* store events, that have been added to the CuPTI eveentGroup
979  in a seperate place (addedEvents).
980  Needed, so that we can read the values for the added events only */
981  CUDA_ptr->addedEvents.count = count;
982  CUDA_ptr->addedEvents.list[i] = index;
983 
984  /* if this device name is different from the actual device the code is running on, then exit */
985  if ( 0 != strncmp( device[currentDeviceID].name,
986  cuda_native_table[index].name,
987  strlen( device[currentDeviceID].name ) ) ) {
988  fprintf( stderr, "Device %s is used -- BUT event %s is collected. \n ---> ERROR: Specify events for the device that is used!\n\n",
989  device[currentDeviceID].name, cuda_native_table[index].name );
990 
991  return ( PAPI_ENOSUPP ); // Not supported
992  }
993 
994  /* Add events to the CuPTI eventGroup */
995  cuptiErr =
996  (*cuptiEventGroupAddEventPtr)( CUDA_ptr->eventGroup,
998  eventId );
999  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupAddEvent" );
1000  }
1001 
1002  return ( PAPI_OK );
1003 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
#define PAPI_ENOSUPP
Definition: papi.h:269
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
void
Definition: iozone.c:18627
int i
Definition: fileop.c:140
static int currentDeviceID
Definition: linux-cuda.h:139
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
CUpti_EventID * eventId
Definition: linux-cuda.c:291
CUDA_register_t resources
Definition: linux-cuda.h:95
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

static int enumEventDomains ( CUdevice  dev,
int  deviceId 
)
static

Definition at line 185 of file linux-cuda.c.

186 {
187  CUptiResult err = CUPTI_SUCCESS;
188  CUpti_EventDomainID *domainId = NULL;
189  uint32_t id = 0;
190  size_t size = 0;
191 
192  device[deviceId].domainCount = 0;
193 
194  /* get number of domains for device dev */
195  err = (*cuptiDeviceGetNumEventDomainsPtr)( dev, &device[deviceId].domainCount );
196  CHECK_CUPTI_ERROR( err, "cuptiDeviceGetNumEventDomains" );
197 
198  if ( device[deviceId].domainCount == 0 ) {
199  printf( "No domain is exposed by dev = %d\n", dev );
200  return -1;
201  }
202 
203  /* CuPTI domain struct */
204  size = sizeof ( CUpti_EventDomainID ) * device[deviceId].domainCount;
205  domainId = ( CUpti_EventDomainID * ) malloc( size );
206  if ( domainId == NULL ) {
207  perror( "malloc(): Failed to allocate memory to CuPTI domain ID" );
208  return -1;
209  }
210  memset( domainId, 0, size );
211 
212  /* PAPI domain struct */
213  device[deviceId].domain =
214  ( DomainData_t * ) malloc( sizeof ( DomainData_t ) *
215  device[deviceId].domainCount );
216  if ( device[deviceId].domain == NULL ) {
217  perror( "malloc(): Failed to allocate memory to PAPI domain struct" );
218  free(domainId);
219  return -1;
220  }
221 
222  /* Enumerates the event domains for a device dev */
223  err = (*cuptiDeviceEnumEventDomainsPtr)( dev, &size, domainId );
224  CHECK_CUPTI_ERROR( err, "cuptiDeviceEnumEventDomains" );
225 
226  /* enum domains */
227  for ( id = 0; id < device[deviceId].domainCount; id++ ) {
228  device[deviceId].domain[id].domainId = domainId[id];
229 
230  /* query domain name */
231  size = PAPI_MIN_STR_LEN;
232 #ifdef CUDA_4_0
233  err = cuptiEventDomainGetAttribute( dev,
234  device[deviceId].domain[id].
235  domainId,
236  CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
237  ( void * ) device[deviceId].
238  domain[id].name );
239  CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
240 
241  /* query num of events avaialble in the domain */
242  size = sizeof ( device[deviceId].domain[id].eventCount );
243  err = cuptiEventDomainGetAttribute( dev,
244  device[deviceId].domain[id].
245  domainId,
246  CUPTI_EVENT_DOMAIN_MAX_EVENTS,
247  &size,
248  ( void * ) &device[deviceId].
249  domain[id].eventCount );
250  CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
251 
252  /* enumerate the events for the domain[id] on the device dev */
253  if ( 0 != enumEvents( dev, deviceId, id ) )
254  return -1;
255 #else
256  err = (*cuptiDeviceGetEventDomainAttributePtr)( dev,
257  device[deviceId].domain[id].domainId,
258  CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
259  ( void * ) device[deviceId].domain[id].name );
260  CHECK_CUPTI_ERROR( err, "cuptiDeviceGetEventDomainAttribute" );
261 
262  /* query num of events avaialble in the domain */
263  err = (*cuptiEventDomainGetNumEventsPtr)( device[deviceId].domain[id].domainId,
264  &device[deviceId].domain[id].eventCount );
265  CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetNumEvents" );
266 
267  /* enumerate the events for the domain[id] on the device deviceId */
268  if ( 0 != enumEvents( deviceId, id ) )
269  return -1;
270 #endif
271  }
272 
273  totalDomainCount += device[deviceId].domainCount;
274  free( domainId );
275  return 0;
276 }
memset(eventId, 0, size)
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
static int enumEvents(int domainId, int eventCount)
#define printf
Definition: papi_test.h:125
err
Definition: linux-cuda.c:323
char *long long size
Definition: iozone.c:12023
free(dummyfile[xx])
uint32_t eventCount
Definition: linux-cuda.h:61
uint32_t domainCount
Definition: linux-cuda.h:70
uint32_t id
Definition: linux-cuda.c:293
DomainData_t * domain
Definition: linux-cuda.h:71
static int totalDomainCount
Definition: linux-cuda.h:137
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
#define PAPI_MIN_STR_LEN
Definition: papi.h:462
CUpti_EventDomainID domainId
Definition: linux-cuda.h:59

Here is the call graph for this function:

for ( )

Definition at line 330 of file linux-cuda.c.

330  {
331  device[deviceId].domain[domainId].event[id].eventId = eventId[id];
332 
333  /* query event name */
335 #ifdef CUDA_4_0
336  err = (*cuptiEventGetAttributePtr)( dev,
337  device[deviceId].domain[domainId].
338  event[id].eventId, CUPTI_EVENT_ATTR_NAME,
339  &size,
340  ( uint8_t * ) device[deviceId].
341  domain[domainId].event[id].name );
342 #else
343  err = (*cuptiEventGetAttributePtr)( device[deviceId].domain[domainId].
344  event[id].eventId, CUPTI_EVENT_ATTR_NAME,
345  &size,
346  ( uint8_t * ) device[deviceId].
347  domain[domainId].event[id].name );
348 #endif
349  CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );
350 
351  /* query event description */
353 #ifdef CUDA_4_0
354  err = (*cuptiEventGetAttributePtr)( dev,
355  device[deviceId].domain[domainId].
356  event[id].eventId,
357  CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
358  ( uint8_t * ) device[deviceId].
359  domain[domainId].event[id].desc );
360 #else
361  err = (*cuptiEventGetAttributePtr)( device[deviceId].domain[domainId].
362  event[id].eventId,
363  CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
364  ( uint8_t * ) device[deviceId].
365  domain[domainId].event[id].desc );
366 #endif
367  CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );
368  }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
CUpti_EventID eventId
Definition: linux-cuda.h:51
err
Definition: linux-cuda.c:323
#define PAPI_2MAX_STR_LEN
Definition: papi.h:464
char *long long size
Definition: iozone.c:12023
EventData_t * event
Definition: linux-cuda.h:62
uint32_t id
Definition: linux-cuda.c:293
DomainData_t * domain
Definition: linux-cuda.h:71
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
#define PAPI_MIN_STR_LEN
Definition: papi.h:462
CUpti_EventID * eventId
Definition: linux-cuda.c:291

Here is the call graph for this function:

free ( eventId  )
static int getEventValue ( long long counts,
CUpti_EventGroup  eventGroup,
AddedEvents_t  addedEvents 
)
static

Definition at line 446 of file linux-cuda.c.

447 {
448  CUptiResult cuptiErr = CUPTI_SUCCESS;
449  size_t events_read, bufferSizeBytes, arraySizeBytes, i;
450  uint64_t *counterDataBuffer;
451  CUpti_EventID *eventIDArray;
452  int j;
453 
454  bufferSizeBytes = addedEvents.count * sizeof ( uint64_t );
455  counterDataBuffer = ( uint64_t * ) malloc( bufferSizeBytes );
456 
457  arraySizeBytes = addedEvents.count * sizeof ( CUpti_EventID );
458  eventIDArray = ( CUpti_EventID * ) malloc( arraySizeBytes );
459 
460  /* read counter data for the specified event from the CuPTI eventGroup */
461  cuptiErr = (*cuptiEventGroupReadAllEventsPtr)( eventGroup,
462  CUPTI_EVENT_READ_FLAG_NONE,
463  &bufferSizeBytes,
464  counterDataBuffer, &arraySizeBytes,
465  eventIDArray, &events_read );
466  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupReadAllEvents" );
467 
468  if ( events_read != ( size_t ) addedEvents.count )
469  return -1;
470 
471  /* Since there is no guarantee that returned counter values are in the same
472  order as the counters in the PAPI addedEvents.list, we need to map the
473  CUpti_EventID to PAPI event ID values.
474  According to CuPTI doc: counter return values of counterDataBuffer
475  correspond to the return event IDs in eventIDArray */
476  for ( i = 0; i < events_read; i++ )
477  for ( j = 0; j < addedEvents.count; j++ )
478  if ( cuda_native_table[addedEvents.list[j]].resources.eventId ==
479  eventIDArray[i] )
480  // since cuptiEventGroupReadAllEvents() resets counter values to 0;
481  // we have to accumulate ourselves
482  counts[addedEvents.list[j]] = counts[addedEvents.list[j]] + counterDataBuffer[i];
483 
484  free( counterDataBuffer );
485  free( eventIDArray );
486  return 0;
487 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
CUpti_EventID eventId
Definition: linux-cuda.h:88
int i
Definition: fileop.c:140
free(dummyfile[xx])
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
long j
Definition: iozone.c:19135
CUDA_register_t resources
Definition: linux-cuda.h:95

Here is the call graph for this function:

Here is the caller graph for this function:

if ( eventId  = = NULL)

Definition at line 299 of file linux-cuda.c.

299  {
300  perror( "malloc(): Failed to allocate memory to CuPTI event ID" );
301  return -1;
302  }
if ( device.domain.  event[deviceId][domainId] = = NULL)

Definition at line 310 of file linux-cuda.c.

310  {
311  perror( "malloc(): Failed to allocate memory to PAPI event struct" );
312  free(eventId);
313  return -1;
314  }
free(dummyfile[xx])
CUpti_EventID * eventId
Definition: linux-cuda.c:291

Here is the call graph for this function:

static int linkCudaLibraries ( )
static

Definition at line 607 of file linux-cuda.c.

608 {
609  /* Attempt to guess if we were statically linked to libc, if so bail */
610  if ( _dl_non_dynamic_init != NULL ) {
611  strncpy(_cuda_vector.cmp_info.disabled_reason, "The cuda component does not support statically linking to libc.",PAPI_MAX_STR_LEN);
612  return PAPI_ENOSUPP;
613  }
614  /* Need to link in the cuda libraries, if not found disable the component */
615  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
616  if (!dl1)
617  {
618  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
619  return ( PAPI_ENOSUPP );
620  }
621  cuCtxCreatePtr = dlsym(dl1, "cuCtxCreate_v2");
622  if (dlerror() != NULL)
623  {
624  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxCreate not found.",PAPI_MAX_STR_LEN);
625  return ( PAPI_ENOSUPP );
626  }
627  cuCtxDestroyPtr = dlsym(dl1, "cuCtxDestroy_v2");
628  if (dlerror() != NULL)
629  {
630  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxDestroy not found.",PAPI_MAX_STR_LEN);
631  return ( PAPI_ENOSUPP );
632  }
633  cuCtxGetCurrentPtr = dlsym(dl1, "cuCtxGetCurrent");
634  if (dlerror() != NULL)
635  {
636  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxGetCurrent not found.",PAPI_MAX_STR_LEN);
637  return ( PAPI_ENOSUPP );
638  }
639  cuDeviceGetPtr = dlsym(dl1, "cuDeviceGet");
640  if (dlerror() != NULL)
641  {
642  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGet not found.",PAPI_MAX_STR_LEN);
643  return ( PAPI_ENOSUPP );
644  }
645  cuDeviceGetCountPtr = dlsym(dl1, "cuDeviceGetCount");
646  if (dlerror() != NULL)
647  {
648  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGetCount not found.",PAPI_MAX_STR_LEN);
649  return ( PAPI_ENOSUPP );
650  }
651  cuDeviceGetNamePtr = dlsym(dl1, "cuDeviceGetName");
652  if (dlerror() != NULL)
653  {
654  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGetName not found.",PAPI_MAX_STR_LEN);
655  return ( PAPI_ENOSUPP );
656  }
657  cuInitPtr = dlsym(dl1, "cuInit");
658  if (dlerror() != NULL)
659  {
660  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
661  return ( PAPI_ENOSUPP );
662  }
663 
664  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
665  if (!dl2)
666  {
667  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
668  return ( PAPI_ENOSUPP );
669  }
670  cudaFreePtr = dlsym(dl2, "cudaFree");
671  if (dlerror() != NULL)
672  {
673  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaFree not found.",PAPI_MAX_STR_LEN);
674  return ( PAPI_ENOSUPP );
675  }
676  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
677  if (dlerror() != NULL)
678  {
679  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
680  return ( PAPI_ENOSUPP );
681  }
682  cudaRuntimeGetVersionPtr = dlsym(dl2, "cudaRuntimeGetVersion");
683  if (dlerror() != NULL)
684  {
685  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaRuntimeGetVersion not found.",PAPI_MAX_STR_LEN);
686  return ( PAPI_ENOSUPP );
687  }
688  cudaDriverGetVersionPtr = dlsym(dl2, "cudaDriverGetVersion");
689  if (dlerror() != NULL)
690  {
691  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaDriverGetVersion not found.",PAPI_MAX_STR_LEN);
692  return ( PAPI_ENOSUPP );
693  }
694 
695  dl3 = dlopen("libcupti.so", RTLD_NOW | RTLD_GLOBAL);
696  if (!dl3)
697  {
698  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA runtime library libcupti.so not found.",PAPI_MAX_STR_LEN);
699  return ( PAPI_ENOSUPP );
700  }
701  cuptiDeviceEnumEventDomainsPtr = dlsym(dl3, "cuptiDeviceEnumEventDomains");
702  if (dlerror() != NULL)
703  {
704  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceEnumEventDomains not found.",PAPI_MAX_STR_LEN);
705  return ( PAPI_ENOSUPP );
706  }
707  cuptiDeviceGetEventDomainAttributePtr = dlsym(dl3, "cuptiDeviceGetEventDomainAttribute");
708  if (dlerror() != NULL)
709  {
710  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceGetEventDomainAttribute not found.",PAPI_MAX_STR_LEN);
711  return ( PAPI_ENOSUPP );
712  }
713  cuptiDeviceGetNumEventDomainsPtr = dlsym(dl3, "cuptiDeviceGetNumEventDomains");
714  if (dlerror() != NULL)
715  {
716  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceGetNumEventDomains not found.",PAPI_MAX_STR_LEN);
717  return ( PAPI_ENOSUPP );
718  }
719  cuptiEventDomainEnumEventsPtr = dlsym(dl3, "cuptiEventDomainEnumEvents");
720  if (dlerror() != NULL)
721  {
722  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventDomainEnumEvents not found.",PAPI_MAX_STR_LEN);
723  return ( PAPI_ENOSUPP );
724  }
725  cuptiEventDomainGetNumEventsPtr = dlsym(dl3, "cuptiEventDomainGetNumEvents");
726  if (dlerror() != NULL)
727  {
728  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventDomainGetNumEvents not found.",PAPI_MAX_STR_LEN);
729  return ( PAPI_ENOSUPP );
730  }
731  cuptiEventGetAttributePtr = dlsym(dl3, "cuptiEventGetAttribute");
732  if (dlerror() != NULL)
733  {
734  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGetAttribute not found.",PAPI_MAX_STR_LEN);
735  return ( PAPI_ENOSUPP );
736  }
737  cuptiEventGroupAddEventPtr = dlsym(dl3, "cuptiEventGroupAddEvent");
738  if (dlerror() != NULL)
739  {
740  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupAddEvent not found.",PAPI_MAX_STR_LEN);
741  return ( PAPI_ENOSUPP );
742  }
743  cuptiEventGroupCreatePtr = dlsym(dl3, "cuptiEventGroupCreate");
744  if (dlerror() != NULL)
745  {
746  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupCreate not found.",PAPI_MAX_STR_LEN);
747  return ( PAPI_ENOSUPP );
748  }
749  cuptiEventGroupDestroyPtr = dlsym(dl3, "cuptiEventGroupDestroy");
750  if (dlerror() != NULL)
751  {
752  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupDestroy not found.",PAPI_MAX_STR_LEN);
753  return ( PAPI_ENOSUPP );
754  }
755  cuptiEventGroupDisablePtr = dlsym(dl3, "cuptiEventGroupDisable");
756  if (dlerror() != NULL)
757  {
758  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupDisable not found.",PAPI_MAX_STR_LEN);
759  return ( PAPI_ENOSUPP );
760  }
761  cuptiEventGroupEnablePtr = dlsym(dl3, "cuptiEventGroupEnable");
762  if (dlerror() != NULL)
763  {
764  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupEnable not found.",PAPI_MAX_STR_LEN);
765  return ( PAPI_ENOSUPP );
766  }
767  cuptiEventGroupReadAllEventsPtr = dlsym(dl3, "cuptiEventGroupReadAllEvents");
768  if (dlerror() != NULL)
769  {
770  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupReadAllEvents not found.",PAPI_MAX_STR_LEN);
771  return ( PAPI_ENOSUPP );
772  }
773  cuptiEventGroupRemoveAllEventsPtr = dlsym(dl3, "cuptiEventGroupRemoveAllEvents");
774  if (dlerror() != NULL)
775  {
776  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupRemoveAllEvents not found.",PAPI_MAX_STR_LEN);
777  return ( PAPI_ENOSUPP );
778  }
779  cuptiEventGroupResetAllEventsPtr = dlsym(dl3, "cuptiEventGroupResetAllEvents");
780  if (dlerror() != NULL)
781  {
782  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupResetAllEvents not found.",PAPI_MAX_STR_LEN);
783  return ( PAPI_ENOSUPP );
784  }
785 
786  return ( PAPI_OK );
787 }
#define PAPI_ENOSUPP
Definition: papi.h:269
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:633
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:41
papi_vector_t _cuda_vector
Definition: linux-cuda.c:1160
#define PAPI_MAX_STR_LEN
Definition: papi.h:463

Here is the caller graph for this function:

memset ( eventId  ,
,
size   
)

Variable Documentation

papi_vector_t _cuda_vector

Definition at line 1160 of file linux-cuda.c.

void(* _dl_non_dynamic_init)(void)

Definition at line 41 of file linux-cuda.c.

122 {
123  CUresult err;
124  int skipDevice = 0;
125  int id;
126  char deviceName_tmp[PAPI_MIN_STR_LEN] = "init";
127 
128  totalEventCount = 0;
129 
130 /* CUDA initialization */
131  err = (*cuInitPtr)( 0 );
132  if ( err != CUDA_SUCCESS ) {
133  SUBDBG ("Info: Error from cuInit(): %d\n", err);
134  return ( PAPI_ENOSUPP );
135  }
136 
137  /* How many gpgpu devices do we have? */
138  err = (*cuDeviceGetCountPtr)( &deviceCount );
139  CHECK_CU_ERROR( err, "cuDeviceGetCount" );
140  if ( deviceCount == 0 )
141  return ( PAPI_ENOSUPP );
142 
143  /* allocate memory for device data table */
144  device = ( DeviceData_t * ) malloc( sizeof ( DeviceData_t ) * deviceCount );
145  if ( device == NULL ) {
146  perror( "malloc(): Failed to allocate memory to CUDA device table" );
147  return ( PAPI_ENOSUPP );
148  }
149 
150  /* What are the devices? Get Name and # of domains per device */
151  for ( id = 0; id < deviceCount; id++ ) {
152  err = (*cuDeviceGetPtr)( &device[id].dev, id );
153  CHECK_CU_ERROR( err, "cuDeviceGet" );
154 
155  err = (*cuDeviceGetNamePtr)( device[id].name, PAPI_MIN_STR_LEN, device[id].dev );
156  CHECK_CU_ERROR( err, "cuDeviceGetName" );
157 
158  SUBDBG ("Cuda deviceName: %s\n", device[id].name);
159 
160  /* Skip device if there are multiple of the same type
161  and if it has been already added to the list */
162  if ( 0 == strcmp( deviceName_tmp, device[id].name ) ) {
163  skipDevice++;
164  continue;
165  }
166 
167  strcpy( deviceName_tmp, device[id].name );
168 
169  /* enumerate the domains on the device */
170  if ( 0 != enumEventDomains( device[id].dev, id ) )
171  return ( PAPI_ENOSUPP );
172  }
173 
174  deviceCount = deviceCount - skipDevice;
175 
176  /* return number of events provided via CuPTI */
177  return totalEventCount;
178 }
CUdevice dev
Definition: linux-cuda.h:68
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.h:26
#define PAPI_ENOSUPP
Definition: papi.h:269
static int deviceCount
Definition: linux-cuda.h:136
totalEventCount
Definition: linux-cuda.c:370
err
Definition: linux-cuda.c:323
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
uint32_t id
Definition: linux-cuda.c:293
static int enumEventDomains(CUdevice dev, int deviceId)
Definition: linux-cuda.c:185
strcpy(filename, default_filename)
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
#define PAPI_MIN_STR_LEN
Definition: papi.h:462
char name[PAPI_MIN_STR_LEN]
Definition: linux-cuda.h:69
err
Initial value:
=
(*cuptiEventDomainEnumEventsPtr)( ( CUpti_EventDomainID ) device[deviceId].
domain[domainId].domainId, &size, eventId )
char *long long size
Definition: iozone.c:12023
static DeviceData_t * device
Definition: linux-cuda.h:155
CUpti_EventID * eventId
Definition: linux-cuda.c:291

Definition at line 323 of file linux-cuda.c.

device [deviceId] domain [domainId] event
Initial value:
=
( EventData_t * ) malloc( sizeof ( EventData_t ) *
device[deviceId].domain[domainId].
eventCount )
DomainData_t * domain
Definition: linux-cuda.h:71
static DeviceData_t * device
Definition: linux-cuda.h:155

Definition at line 306 of file linux-cuda.c.

eventId = NULL

Definition at line 291 of file linux-cuda.c.

uint32_t id = 0

Definition at line 293 of file linux-cuda.c.

int
static
Initial value:
{
CUptiResult err = CUPTI_SUCCESS
err
Definition: linux-cuda.c:323

Definition at line 289 of file linux-cuda.c.

return

Definition at line 372 of file linux-cuda.c.

size = 0

Definition at line 292 of file linux-cuda.c.

totalEventCount = device[deviceId].domain[domainId].eventCount

Definition at line 370 of file linux-cuda.c.