PAPI  5.3.2.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-cuda.c File Reference

This file has the source code for a component that enables PAPI-C to access hardware monitoring counters for GPU devices through the CUPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Macros

#define CUDAAPI   __attribute__((weak))
 
#define CUDARTAPI   __attribute__((weak))
 
#define CUPTIAPI   __attribute__((weak))
 

Functions

static int enumEventDomains (CUdevice dev, int deviceId)
 
 if (eventId==NULL)
 
 memset (eventId, 0, size)
 
 if (device[deviceId].domain[domainId].event==NULL)
 
 CHECK_CUPTI_ERROR (err,"cuptiEventDomainEnumEvents")
 
 for (id=0;id< device[deviceId].domain[domainId].eventCount;id++)
 
 free (eventId)
 
static int createNativeEvents (void)
 
static int getEventValue (long long *counts, CUpti_EventGroup eventGroup, AddedEvents_t addedEvents)
 
int CUDA_init_thread (hwd_context_t *ctx)
 
int CUDA_init_component (int cidx)
 
static int linkCudaLibraries ()
 
int CUDA_init_control_state (hwd_control_state_t *ctrl)
 
int CUDA_start (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
int CUDA_stop (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
int CUDA_read (hwd_context_t *ctx, hwd_control_state_t *ctrl, long_long **events, int flags)
 
int CUDA_shutdown_thread (hwd_context_t *ctx)
 
int CUDA_shutdown_component (void)
 
int CUDA_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
int CUDA_update_control_state (hwd_control_state_t *ptr, NativeInfo_t *native, int count, hwd_context_t *ctx)
 
int CUDA_set_domain (hwd_control_state_t *cntrl, int domain)
 
int CUDA_reset (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
int CUDA_cleanup_eventset (hwd_control_state_t *ctrl)
 
int CUDA_ntv_enum_events (unsigned int *EventCode, int modifier)
 
int CUDA_ntv_code_to_name (unsigned int EventCode, char *name, int len)
 
int CUDA_ntv_code_to_descr (unsigned int EventCode, char *name, int len)
 
int CUDA_ntv_code_to_bits (unsigned int EventCode, hwd_register_t *bits)
 

Variables

void(* _dl_non_dynamic_init )(void)
 
static int
 
CUpti_EventID * eventId = NULL
 
size_t size = 0
 
uint32_t id = 0
 
device[deviceId] domain[domainId] event
 
 err
 
 totalEventCount = device[deviceId].domain[domainId].eventCount
 
 return
 
papi_vector_t _cuda_vector
 

Detailed Description

Author
Heike Jagode (in collaboration with Robert Dietrich, TU Dresden) jagod.nosp@m.e@ee.nosp@m.cs.ut.nosp@m.k.ed.nosp@m.u

Definition in file linux-cuda.c.

Macro Definition Documentation

#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define CUPTIAPI   __attribute__((weak))

Function Documentation

CHECK_CUPTI_ERROR ( err  ,
"cuptiEventDomainEnumEvents"   
)

Here is the caller graph for this function:

static int createNativeEvents ( void  )
static

Definition at line 380 of file linux-cuda.c.

381 {
382  int deviceId, id = 0;
383  uint32_t domainId, eventId;
384  int cuptiDomainId;
385  int i;
386  int devNameLen;
387 
388  /* create events for every GPU device and every domain per device */
389  for ( deviceId = 0; deviceId < deviceCount; deviceId++ ) {
390  /* for the event names, replace blanks in the device name with underscores */
391  devNameLen = strlen( device[deviceId].name );
392  for ( i = 0; i < devNameLen; i++ )
393  if ( device[deviceId].name[i] == ' ' )
394  device[deviceId].name[i] = '_';
395 
396  for ( domainId = 0; domainId < device[deviceId].domainCount;
397  domainId++ ) {
398  cuptiDomainId = device[deviceId].domain[domainId].domainId;
399 
400  for ( eventId = 0;
401  eventId < device[deviceId].domain[domainId].eventCount;
402  eventId++ ) {
403  /* Save native event data */
405  "%s:%s:%s",
406  device[deviceId].name,
407  device[deviceId].domain[domainId].name,
408  device[deviceId].domain[domainId].event[eventId].
409  name );
410 
411  strncpy( cuda_native_table[id].description,
412  device[deviceId].domain[domainId].event[eventId].desc,
414 
415  /* The selector has to be !=0 . Starts with 1 */
417 
418  /* store event ID */
420  device[deviceId].domain[domainId].event[eventId].eventId;
421 
422  /* increment the table index counter */
423  id++;
424  }
425  }
426  }
427 
428  /* Return the number of events created */
429  return id;
430 }
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
static int deviceCount
Definition: linux-cuda.h:136
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
CUpti_EventID eventId
Definition: linux-cuda.h:51
CUpti_EventID eventId
Definition: linux-cuda.h:88
#define PAPI_2MAX_STR_LEN
Definition: papi.h:464
int i
Definition: fileop.c:140
uint32_t eventCount
Definition: linux-cuda.h:61
char description[PAPI_MAX_STR_LEN]
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
EventData_t * event
Definition: linux-cuda.h:62
uint32_t domainCount
Definition: linux-cuda.h:70
uint32_t id
Definition: linux-cuda.c:293
DomainData_t * domain
Definition: linux-cuda.h:71
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
char name[PAPI_MIN_STR_LEN]
Definition: linux-cuda.h:69
CUpti_EventDomainID domainId
Definition: linux-cuda.h:59
CUpti_EventID * eventId
Definition: linux-cuda.c:291
CUDA_register_t resources
Definition: linux-cuda.h:95
unsigned int selector
Definition: linux-cuda.h:86

Here is the call graph for this function:

Here is the caller graph for this function:

int CUDA_cleanup_eventset ( hwd_control_state_t ctrl)

Definition at line 1050 of file linux-cuda.c.

1051 {
1052  ( void ) ctrl;
1053 
1054  // TODO: after cleanup_eventset() which destroys the eventset, update_control_state()
1055  // is called, which operates on the already destroyed eventset. Bad!
1056 #if 0
1057  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
1058  CUptiResult cuptiErr = CUPTI_SUCCESS;
1059 
1060  /* Disable the CUDA eventGroup;
1061  it also frees the perfmon hardware on the GPU */
1062  cuptiErr = (*cuptiEventGroupDisablePtr)( CUDA_ctrl->eventGroup );
1063  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );
1064 
1065  /* Call the CuPTI cleaning function before leaving */
1066  cuptiErr = (*cuptiEventGroupDestroyPtr)( CUDA_ctrl->eventGroup );
1067  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDestroy" );
1068 #endif
1069  return ( PAPI_OK );
1070 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

int CUDA_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

Definition at line 930 of file linux-cuda.c.

931 {
932  ( void ) ctx;
933  ( void ) code;
934  ( void ) option;
935  return ( PAPI_OK );
936 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
int CUDA_init_component ( int  cidx)

Definition at line 513 of file linux-cuda.c.

514 {
515  SUBDBG ("Entry: cidx: %d\n", cidx);
516  CUresult cuErr = CUDA_SUCCESS;
517 
518  /* link in all the cuda libraries and resolve the symbols we need to use */
519  if (linkCudaLibraries() != PAPI_OK) {
520  SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
521  SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
522  return (PAPI_ENOSUPP);
523  }
524 
525  /* Create dynamic event table */
526  NUM_EVENTS = detectDevice( );
527  if (NUM_EVENTS < 0) {
528  strncpy(_cuda_vector.cmp_info.disabled_reason, "Call to detectDevice failed.",PAPI_MAX_STR_LEN);
529  return (PAPI_ENOSUPP);
530  }
531  /* TODO: works only for one device right now;
532  need to find out if user can use 2 or more devices at same time */
533 
534  /* want create a CUDA context for either the default device or
535  the device specified with cudaSetDevice() in user code */
536  if ( CUDA_SUCCESS != (*cudaGetDevicePtr)( &currentDeviceID ) ) {
537  strncpy(_cuda_vector.cmp_info.disabled_reason, "No NVIDIA GPU's found.",PAPI_MAX_STR_LEN);
538  return ( PAPI_ENOSUPP );
539  }
540 
541  if ( getenv( "PAPI_VERBOSE" ) ) {
542  printf( "DEVICE USED: %s (%d)\n", device[currentDeviceID].name,
543  currentDeviceID );
544  }
545 
546  /* get the CUDA context from the calling CPU thread */
547  cuErr = (*cuCtxGetCurrentPtr)( &cuCtx );
548 
549  /* if no CUDA context is bound to the calling CPU thread yet, create one */
550  if ( cuErr != CUDA_SUCCESS || cuCtx == NULL ) {
551  cuErr = (*cuCtxCreatePtr)( &cuCtx, 0, device[currentDeviceID].dev );
552  CHECK_CU_ERROR( cuErr, "cuCtxCreate" );
553  }
554 
555  /* cuCtxGetCurrent() can return a non-null context that is not valid
556  because the context has not yet been initialized.
557  Here is a workaround:
558  cudaFree(NULL) forces the context to be initialized
559  if cudaFree(NULL) returns success then we are able to use the context in subsequent calls
560  if cudaFree(NULL) returns an error (or subsequent cupti* calls) then the context is not usable,
561  and will never be useable */
562  if ( CUDA_SUCCESS != (*cudaFreePtr)( NULL ) ) {
563  strncpy(_cuda_vector.cmp_info.disabled_reason, "Problem initializing CUDA context.",PAPI_MAX_STR_LEN);
564  return ( PAPI_ENOSUPP );
565  }
566 
567  /* Create dynamic event table */
569  malloc( sizeof ( CUDA_native_event_entry_t ) * NUM_EVENTS );
570  if ( cuda_native_table == NULL ) {
571  perror( "malloc(): Failed to allocate memory to events table" );
572  strncpy(_cuda_vector.cmp_info.disabled_reason, "Failed to allocate memory to events table.",PAPI_MAX_STR_LEN);
573  return ( PAPI_ENOSUPP );
574  }
575 
576  if ( NUM_EVENTS != createNativeEvents( ) ) {
577  strncpy(_cuda_vector.cmp_info.disabled_reason, "Error creating CUDA event list.",PAPI_MAX_STR_LEN);
578  return ( PAPI_ENOSUPP );
579  }
580 
581  /* Export the component id */
583 
584  /* Number of events */
586 
587  return ( PAPI_OK );
588 }
CUdevice dev
Definition: linux-cuda.h:68
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.h:26
char * getenv()
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define printf
Definition: papi_test.h:125
static int createNativeEvents(void)
Definition: linux-cuda.c:380
#define NUM_EVENTS
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:632
#define PAPI_ENOSUPP
Definition: fpapi.h:123
static CUcontext cuCtx
Definition: linux-cuda.h:156
static int currentDeviceID
Definition: linux-cuda.h:139
static int cidx
Definition: event_info.c:40
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
papi_vector_t _cuda_vector
Definition: linux-cuda.c:1151
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
Definition: linux-cuda.h:93
static int linkCudaLibraries()
Definition: linux-cuda.c:598

Here is the call graph for this function:

int CUDA_init_control_state ( hwd_control_state_t ctrl)

Definition at line 786 of file linux-cuda.c.

787 {
788  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
789  CUptiResult cuptiErr = CUPTI_SUCCESS;
790  int i;
791 
792  /* allocate memory for the list of events that are added to the CuPTI eventGroup */
793  CUDA_ctrl->addedEvents.list = malloc( sizeof ( int ) * NUM_EVENTS );
794  if ( CUDA_ctrl->addedEvents.list == NULL ) {
795  perror
796  ( "malloc(): Failed to allocate memory to table of events that are added to CuPTI eventGroup" );
797  return ( PAPI_ENOSUPP );
798  }
799 
800  /* initialize the event list */
801  for ( i = 0; i < NUM_EVENTS; i++ )
802  CUDA_ctrl->addedEvents.list[i] = 0;
803 
804 
805 
806  cuptiErr = (*cuptiEventGroupCreatePtr)( cuCtx, &CUDA_ctrl->eventGroup, 0 );
807  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupCreate" );
808 
809  return PAPI_OK;
810 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
return PAPI_OK
Definition: linux-nvml.c:458
#define NUM_EVENTS
int i
Definition: fileop.c:140
#define PAPI_ENOSUPP
Definition: fpapi.h:123
static CUcontext cuCtx
Definition: linux-cuda.h:156
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

int CUDA_init_thread ( hwd_context_t ctx)

Definition at line 489 of file linux-cuda.c.

490 {
491  ( void ) ctx;
492 
493  return PAPI_OK;
494 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
int CUDA_ntv_code_to_bits ( unsigned int  EventCode,
hwd_register_t bits 
)

Definition at line 1136 of file linux-cuda.c.

1137 {
1138  int index = EventCode;
1139 
1140  memcpy( ( CUDA_register_t * ) bits,
1141  &( cuda_native_table[index].resources ),
1142  sizeof ( CUDA_register_t ) );
1143 
1144  return ( PAPI_OK );
1145 }
return PAPI_OK
Definition: linux-nvml.c:458
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
int CUDA_ntv_code_to_descr ( unsigned int  EventCode,
char *  name,
int  len 
)

Definition at line 1123 of file linux-cuda.c.

1124 {
1125  int index = EventCode;
1126 
1127  strncpy( name, cuda_native_table[index].description, len );
1128  return ( PAPI_OK );
1129 }
return PAPI_OK
Definition: linux-nvml.c:458
char description[PAPI_MAX_STR_LEN]
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
char * name
Definition: iozone.c:23648
int CUDA_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)

Definition at line 1110 of file linux-cuda.c.

1111 {
1112  int index = EventCode;
1113 
1114  strncpy( name, cuda_native_table[index].name, len );
1115  return ( PAPI_OK );
1116 }
return PAPI_OK
Definition: linux-nvml.c:458
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
char * name
Definition: iozone.c:23648
int CUDA_ntv_enum_events ( unsigned int EventCode,
int  modifier 
)

Definition at line 1077 of file linux-cuda.c.

1078 {
1079 
1080  switch ( modifier ) {
1081  case PAPI_ENUM_FIRST:
1082  *EventCode = 0;
1083 
1084  return ( PAPI_OK );
1085  break;
1086 
1087  case PAPI_ENUM_EVENTS:
1088  {
1089  int index = *EventCode;
1090 
1091  if ( index < NUM_EVENTS - 1 ) {
1092  *EventCode = *EventCode + 1;
1093  return ( PAPI_OK );
1094  } else
1095  return ( PAPI_ENOEVNT );
1096 
1097  break;
1098  }
1099  default:
1100  return ( PAPI_EINVAL );
1101  }
1102  return ( PAPI_EINVAL );
1103 }
#define PAPI_ENOEVNT
Definition: fpapi.h:112
return PAPI_OK
Definition: linux-nvml.c:458
return PAPI_EINVAL
Definition: linux-nvml.c:408
#define NUM_EVENTS
int CUDA_read ( hwd_context_t ctx,
hwd_control_state_t ctrl,
long_long **  events,
int  flags 
)

Definition at line 856 of file linux-cuda.c.

858 {
859  ( void ) ctx;
860  ( void ) flags;
861  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
862 
863 
864  if ( 0 != getEventValue( CUDA_ctrl->counts, CUDA_ctrl->eventGroup, CUDA_ctrl->addedEvents ) )
865  return ( PAPI_ENOSUPP );
866 
867  *events = CUDA_ctrl->counts;
868 
869  return ( PAPI_OK );
870 }
long long flags
Definition: iozone.c:12330
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define PAPI_ENOSUPP
Definition: fpapi.h:123
char events[MAX_EVENTS][BUFSIZ]
long long counts[CUDA_MAX_COUNTERS]
Definition: linux-cuda.h:111
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109
static int getEventValue(long long *counts, CUpti_EventGroup eventGroup, AddedEvents_t addedEvents)
Definition: linux-cuda.c:437

Here is the call graph for this function:

int CUDA_reset ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 1033 of file linux-cuda.c.

1034 {
1035  ( void ) ctx;
1036  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
1037  CUptiResult cuptiErr = CUPTI_SUCCESS;
1038 
1039  /* Resets all events in the CuPTI eventGroup to zero */
1040  cuptiErr = (*cuptiEventGroupResetAllEventsPtr)( CUDA_ctrl->eventGroup );
1041  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );
1042 
1043  return ( PAPI_OK );
1044 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

int CUDA_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

Definition at line 1008 of file linux-cuda.c.

1009 {
1010  int found = 0;
1011  ( void ) cntrl;
1012 
1013  if ( PAPI_DOM_USER & domain )
1014  found = 1;
1015 
1016  if ( PAPI_DOM_KERNEL & domain )
1017  found = 1;
1018 
1019  if ( PAPI_DOM_OTHER & domain )
1020  found = 1;
1021 
1022  if ( !found )
1023  return ( PAPI_EINVAL );
1024 
1025  return ( PAPI_OK );
1026 }
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_DOM_OTHER
Definition: fpapi.h:23
#define PAPI_DOM_KERNEL
Definition: fpapi.h:22
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
long long found
Definition: libasync.c:735
#define PAPI_DOM_USER
Definition: fpapi.h:21
int CUDA_shutdown_component ( void  )

Definition at line 887 of file linux-cuda.c.

888 {
889  CUresult cuErr = CUDA_SUCCESS;
890 
891  /* if running a threaded application, we need to make sure that
892  a thread doesn't free the same memory location(s) more than once */
893  if ( CUDA_FREED == 0 ) {
894  uint32_t j;
895  int i;
896 
897  CUDA_FREED = 1;
898 
899  /* deallocate all the memory */
900  for ( i = 0; i < deviceCount; i++ ) {
901  for ( j = 0; j < device[i].domainCount; j++ )
902  free( device[i].domain[j].event );
903 
904  free( device[i].domain );
905  }
906 
907  free( device );
909 
910  /* destroy floating CUDA context */
911  cuErr = (*cuCtxDestroyPtr)( cuCtx );
912  if ( cuErr != CUDA_SUCCESS )
913  return ( PAPI_ENOSUPP ); // Not supported
914  }
915 
916  // close the dynamic libraries needed by this component (opened in the init substrate call)
917  dlclose(dl1);
918  dlclose(dl2);
919  dlclose(dl3);
920 
921  return ( PAPI_OK );
922 }
static int deviceCount
Definition: linux-cuda.h:136
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
return PAPI_OK
Definition: linux-nvml.c:458
int i
Definition: fileop.c:140
#define PAPI_ENOSUPP
Definition: fpapi.h:123
static CUcontext cuCtx
Definition: linux-cuda.h:156
free(dummyfile[xx])
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
uint32_t domainCount
Definition: linux-cuda.h:70
static DeviceData_t * device
Definition: linux-cuda.h:155
long j
Definition: iozone.c:19135
static int CUDA_FREED
Definition: linux-cuda.h:140

Here is the call graph for this function:

int CUDA_shutdown_thread ( hwd_context_t ctx)

Definition at line 876 of file linux-cuda.c.

877 {
878  CUDA_context_t *CUDA_ctx = (CUDA_context_t*)ctx;
879  free( CUDA_ctx->state.addedEvents.list );
880  return (PAPI_OK);
881 }
CUDA_control_state_t state
Definition: linux-cuda.h:118
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
return PAPI_OK
Definition: linux-nvml.c:458
free(dummyfile[xx])

Here is the call graph for this function:

int CUDA_start ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 817 of file linux-cuda.c.

818 {
819  ( void ) ctx;
820  int i;
821  CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
822  CUptiResult cuptiErr = CUPTI_SUCCESS;
823 
824  // reset all event values to 0
825  for ( i = 0; i < NUM_EVENTS; i++ )
826  CUDA_ctrl->counts[i] = 0;
827 
828  cuptiErr = (*cuptiEventGroupEnablePtr)( CUDA_ctrl->eventGroup );
829  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupEnable" );
830 
831  /* Resets all events in the CuPTI eventGroup to zero */
832  cuptiErr = (*cuptiEventGroupResetAllEventsPtr)( CUDA_ctrl->eventGroup );
833  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );
834 
835  return ( PAPI_OK );
836 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define NUM_EVENTS
int i
Definition: fileop.c:140
long long counts[CUDA_MAX_COUNTERS]
Definition: linux-cuda.h:111
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

int CUDA_stop ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 843 of file linux-cuda.c.

844 {
845  ( void ) ctx;
846  ( void ) ctrl;
847 
848  return ( PAPI_OK );
849 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
int CUDA_update_control_state ( hwd_control_state_t ptr,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
)

Definition at line 946 of file linux-cuda.c.

949 {
950  ( void ) ctx;
951  CUDA_control_state_t * CUDA_ptr = ( CUDA_control_state_t * ) ptr;
952  int index, i;
953  CUptiResult cuptiErr = CUPTI_SUCCESS;
954 
955  /* Disable the CUDA eventGroup;
956  it also frees the perfmon hardware on the GPU */
957  cuptiErr = (*cuptiEventGroupDisablePtr)( CUDA_ptr->eventGroup );
958  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );
959 
960  cuptiErr = (*cuptiEventGroupRemoveAllEventsPtr)( CUDA_ptr->eventGroup );
961  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupRemoveAllEvents" );
962 
963  // otherwise, add the events to the eventset
964  for ( i = 0; i < count; i++ ) {
965 
966  index = native[i].ni_event;
967  native[i].ni_position = index;
968 
969  /* store events, that have been added to the CuPTI eveentGroup
970  in a seperate place (addedEvents).
971  Needed, so that we can read the values for the added events only */
972  CUDA_ptr->addedEvents.count = count;
973  CUDA_ptr->addedEvents.list[i] = index;
974 
975  /* if this device name is different from the actual device the code is running on, then exit */
976  if ( 0 != strncmp( device[currentDeviceID].name,
977  cuda_native_table[index].name,
978  strlen( device[currentDeviceID].name ) ) ) {
979  fprintf( stderr, "Device %s is used -- BUT event %s is collected. \n ---> ERROR: Specify events for the device that is used!\n\n",
980  device[currentDeviceID].name, cuda_native_table[index].name );
981 
982  return ( PAPI_ENOSUPP ); // Not supported
983  }
984 
985  /* Add events to the CuPTI eventGroup */
986  cuptiErr =
987  (*cuptiEventGroupAddEventPtr)( CUDA_ptr->eventGroup,
989  eventId );
990  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupAddEvent" );
991  }
992 
993  return ( PAPI_OK );
994 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
void
Definition: iozone.c:18627
int i
Definition: fileop.c:140
#define PAPI_ENOSUPP
Definition: fpapi.h:123
static int currentDeviceID
Definition: linux-cuda.h:139
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
CUpti_EventID * eventId
Definition: linux-cuda.c:291
CUDA_register_t resources
Definition: linux-cuda.h:95
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109

Here is the call graph for this function:

static int enumEventDomains ( CUdevice  dev,
int  deviceId 
)
static

Definition at line 185 of file linux-cuda.c.

186 {
187  CUptiResult err = CUPTI_SUCCESS;
188  CUpti_EventDomainID *domainId = NULL;
189  uint32_t id = 0;
190  size_t size = 0;
191 
192  device[deviceId].domainCount = 0;
193 
194  /* get number of domains for device dev */
195  err = (*cuptiDeviceGetNumEventDomainsPtr)( dev, &device[deviceId].domainCount );
196  CHECK_CUPTI_ERROR( err, "cuptiDeviceGetNumEventDomains" );
197 
198  if ( device[deviceId].domainCount == 0 ) {
199  printf( "No domain is exposed by dev = %d\n", dev );
200  return -1;
201  }
202 
203  /* CuPTI domain struct */
204  size = sizeof ( CUpti_EventDomainID ) * device[deviceId].domainCount;
205  domainId = ( CUpti_EventDomainID * ) malloc( size );
206  if ( domainId == NULL ) {
207  perror( "malloc(): Failed to allocate memory to CuPTI domain ID" );
208  return -1;
209  }
210  memset( domainId, 0, size );
211 
212  /* PAPI domain struct */
213  device[deviceId].domain =
214  ( DomainData_t * ) malloc( sizeof ( DomainData_t ) *
215  device[deviceId].domainCount );
216  if ( device[deviceId].domain == NULL ) {
217  perror( "malloc(): Failed to allocate memory to PAPI domain struct" );
218  free(domainId);
219  return -1;
220  }
221 
222  /* Enumerates the event domains for a device dev */
223  err = (*cuptiDeviceEnumEventDomainsPtr)( dev, &size, domainId );
224  CHECK_CUPTI_ERROR( err, "cuptiDeviceEnumEventDomains" );
225 
226  /* enum domains */
227  for ( id = 0; id < device[deviceId].domainCount; id++ ) {
228  device[deviceId].domain[id].domainId = domainId[id];
229 
230  /* query domain name */
231  size = PAPI_MIN_STR_LEN;
232 #ifdef CUDA_4_0
233  err = cuptiEventDomainGetAttribute( dev,
234  device[deviceId].domain[id].
235  domainId,
236  CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
237  ( void * ) device[deviceId].
238  domain[id].name );
239  CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
240 
241  /* query num of events avaialble in the domain */
242  size = sizeof ( device[deviceId].domain[id].eventCount );
243  err = cuptiEventDomainGetAttribute( dev,
244  device[deviceId].domain[id].
245  domainId,
246  CUPTI_EVENT_DOMAIN_MAX_EVENTS,
247  &size,
248  ( void * ) &device[deviceId].
249  domain[id].eventCount );
250  CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
251 
252  /* enumerate the events for the domain[id] on the device dev */
253  if ( 0 != enumEvents( dev, deviceId, id ) )
254  return -1;
255 #else
256  err = (*cuptiDeviceGetEventDomainAttributePtr)( dev,
257  device[deviceId].domain[id].domainId,
258  CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
259  ( void * ) device[deviceId].domain[id].name );
260  CHECK_CUPTI_ERROR( err, "cuptiDeviceGetEventDomainAttribute" );
261 
262  /* query num of events avaialble in the domain */
263  err = (*cuptiEventDomainGetNumEventsPtr)( device[deviceId].domain[id].domainId,
264  &device[deviceId].domain[id].eventCount );
265  CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetNumEvents" );
266 
267  /* enumerate the events for the domain[id] on the device deviceId */
268  if ( 0 != enumEvents( deviceId, id ) )
269  return -1;
270 #endif
271  }
272 
273  totalDomainCount += device[deviceId].domainCount;
274  free( domainId );
275  return 0;
276 }
memset(eventId, 0, size)
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
#define PAPI_MIN_STR_LEN
Definition: fpapi.h:41
static int enumEvents(int domainId, int eventCount)
#define printf
Definition: papi_test.h:125
err
Definition: linux-cuda.c:323
char *long long size
Definition: iozone.c:12023
free(dummyfile[xx])
uint32_t eventCount
Definition: linux-cuda.h:61
uint32_t domainCount
Definition: linux-cuda.h:70
uint32_t id
Definition: linux-cuda.c:293
DomainData_t * domain
Definition: linux-cuda.h:71
static int totalDomainCount
Definition: linux-cuda.h:137
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
CUpti_EventDomainID domainId
Definition: linux-cuda.h:59

Here is the call graph for this function:

for ( )

Definition at line 330 of file linux-cuda.c.

330  {
331  device[deviceId].domain[domainId].event[id].eventId = eventId[id];
332 
333  /* query event name */
335 #ifdef CUDA_4_0
336  err = (*cuptiEventGetAttributePtr)( dev,
337  device[deviceId].domain[domainId].
338  event[id].eventId, CUPTI_EVENT_ATTR_NAME,
339  &size,
340  ( uint8_t * ) device[deviceId].
341  domain[domainId].event[id].name );
342 #else
343  err = (*cuptiEventGetAttributePtr)( device[deviceId].domain[domainId].
344  event[id].eventId, CUPTI_EVENT_ATTR_NAME,
345  &size,
346  ( uint8_t * ) device[deviceId].
347  domain[domainId].event[id].name );
348 #endif
349  CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );
350 
351  /* query event description */
353 #ifdef CUDA_4_0
354  err = (*cuptiEventGetAttributePtr)( dev,
355  device[deviceId].domain[domainId].
356  event[id].eventId,
357  CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
358  ( uint8_t * ) device[deviceId].
359  domain[domainId].event[id].desc );
360 #else
361  err = (*cuptiEventGetAttributePtr)( device[deviceId].domain[domainId].
362  event[id].eventId,
363  CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
364  ( uint8_t * ) device[deviceId].
365  domain[domainId].event[id].desc );
366 #endif
367  CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );
368  }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
#define PAPI_MIN_STR_LEN
Definition: fpapi.h:41
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
CUpti_EventID eventId
Definition: linux-cuda.h:51
err
Definition: linux-cuda.c:323
#define PAPI_2MAX_STR_LEN
Definition: papi.h:464
char *long long size
Definition: iozone.c:12023
EventData_t * event
Definition: linux-cuda.h:62
uint32_t id
Definition: linux-cuda.c:293
DomainData_t * domain
Definition: linux-cuda.h:71
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
CUpti_EventID * eventId
Definition: linux-cuda.c:291

Here is the call graph for this function:

free ( eventId  )
static int getEventValue ( long long counts,
CUpti_EventGroup  eventGroup,
AddedEvents_t  addedEvents 
)
static

Definition at line 437 of file linux-cuda.c.

438 {
439  CUptiResult cuptiErr = CUPTI_SUCCESS;
440  size_t events_read, bufferSizeBytes, arraySizeBytes, i;
441  uint64_t *counterDataBuffer;
442  CUpti_EventID *eventIDArray;
443  int j;
444 
445  bufferSizeBytes = addedEvents.count * sizeof ( uint64_t );
446  counterDataBuffer = ( uint64_t * ) malloc( bufferSizeBytes );
447 
448  arraySizeBytes = addedEvents.count * sizeof ( CUpti_EventID );
449  eventIDArray = ( CUpti_EventID * ) malloc( arraySizeBytes );
450 
451  /* read counter data for the specified event from the CuPTI eventGroup */
452  cuptiErr = (*cuptiEventGroupReadAllEventsPtr)( eventGroup,
453  CUPTI_EVENT_READ_FLAG_NONE,
454  &bufferSizeBytes,
455  counterDataBuffer, &arraySizeBytes,
456  eventIDArray, &events_read );
457  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupReadAllEvents" );
458 
459  if ( events_read != ( size_t ) addedEvents.count )
460  return -1;
461 
462  /* Since there is no guarantee that returned counter values are in the same
463  order as the counters in the PAPI addedEvents.list, we need to map the
464  CUpti_EventID to PAPI event ID values.
465  According to CuPTI doc: counter return values of counterDataBuffer
466  correspond to the return event IDs in eventIDArray */
467  for ( i = 0; i < events_read; i++ )
468  for ( j = 0; j < addedEvents.count; j++ )
469  if ( cuda_native_table[addedEvents.list[j]].resources.eventId ==
470  eventIDArray[i] )
471  // since cuptiEventGroupReadAllEvents() resets counter values to 0;
472  // we have to accumulate ourselves
473  counts[addedEvents.list[j]] = counts[addedEvents.list[j]] + counterDataBuffer[i];
474 
475  free( counterDataBuffer );
476  free( eventIDArray );
477  return 0;
478 }
CHECK_CUPTI_ERROR(err,"cuptiEventDomainEnumEvents")
CUpti_EventID eventId
Definition: linux-cuda.h:88
int i
Definition: fileop.c:140
free(dummyfile[xx])
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
long j
Definition: iozone.c:19135
CUDA_register_t resources
Definition: linux-cuda.h:95

Here is the call graph for this function:

Here is the caller graph for this function:

if ( eventId  = = NULL)

Definition at line 299 of file linux-cuda.c.

299  {
300  perror( "malloc(): Failed to allocate memory to CuPTI event ID" );
301  return -1;
302  }
if ( device.domain.  event[deviceId][domainId] = = NULL)

Definition at line 310 of file linux-cuda.c.

310  {
311  perror( "malloc(): Failed to allocate memory to PAPI event struct" );
312  free(eventId);
313  return -1;
314  }
free(dummyfile[xx])
CUpti_EventID * eventId
Definition: linux-cuda.c:291

Here is the call graph for this function:

static int linkCudaLibraries ( )
static

Definition at line 598 of file linux-cuda.c.

599 {
600  /* Attempt to guess if we were statically linked to libc, if so bail */
601  if ( _dl_non_dynamic_init != NULL ) {
602  strncpy(_cuda_vector.cmp_info.disabled_reason, "The cuda component does not support statically linking to libc.",PAPI_MAX_STR_LEN);
603  return PAPI_ENOSUPP;
604  }
605  /* Need to link in the cuda libraries, if not found disable the component */
606  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
607  if (!dl1)
608  {
609  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
610  return ( PAPI_ENOSUPP );
611  }
612  cuCtxCreatePtr = dlsym(dl1, "cuCtxCreate_v2");
613  if (dlerror() != NULL)
614  {
615  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxCreate not found.",PAPI_MAX_STR_LEN);
616  return ( PAPI_ENOSUPP );
617  }
618  cuCtxDestroyPtr = dlsym(dl1, "cuCtxDestroy_v2");
619  if (dlerror() != NULL)
620  {
621  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxDestroy not found.",PAPI_MAX_STR_LEN);
622  return ( PAPI_ENOSUPP );
623  }
624  cuCtxGetCurrentPtr = dlsym(dl1, "cuCtxGetCurrent");
625  if (dlerror() != NULL)
626  {
627  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxGetCurrent not found.",PAPI_MAX_STR_LEN);
628  return ( PAPI_ENOSUPP );
629  }
630  cuDeviceGetPtr = dlsym(dl1, "cuDeviceGet");
631  if (dlerror() != NULL)
632  {
633  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGet not found.",PAPI_MAX_STR_LEN);
634  return ( PAPI_ENOSUPP );
635  }
636  cuDeviceGetCountPtr = dlsym(dl1, "cuDeviceGetCount");
637  if (dlerror() != NULL)
638  {
639  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGetCount not found.",PAPI_MAX_STR_LEN);
640  return ( PAPI_ENOSUPP );
641  }
642  cuDeviceGetNamePtr = dlsym(dl1, "cuDeviceGetName");
643  if (dlerror() != NULL)
644  {
645  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGetName not found.",PAPI_MAX_STR_LEN);
646  return ( PAPI_ENOSUPP );
647  }
648  cuInitPtr = dlsym(dl1, "cuInit");
649  if (dlerror() != NULL)
650  {
651  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
652  return ( PAPI_ENOSUPP );
653  }
654 
655  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
656  if (!dl2)
657  {
658  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
659  return ( PAPI_ENOSUPP );
660  }
661  cudaFreePtr = dlsym(dl2, "cudaFree");
662  if (dlerror() != NULL)
663  {
664  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaFree not found.",PAPI_MAX_STR_LEN);
665  return ( PAPI_ENOSUPP );
666  }
667  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
668  if (dlerror() != NULL)
669  {
670  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
671  return ( PAPI_ENOSUPP );
672  }
673  cudaRuntimeGetVersionPtr = dlsym(dl2, "cudaRuntimeGetVersion");
674  if (dlerror() != NULL)
675  {
676  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaRuntimeGetVersion not found.",PAPI_MAX_STR_LEN);
677  return ( PAPI_ENOSUPP );
678  }
679  cudaDriverGetVersionPtr = dlsym(dl2, "cudaDriverGetVersion");
680  if (dlerror() != NULL)
681  {
682  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaDriverGetVersion not found.",PAPI_MAX_STR_LEN);
683  return ( PAPI_ENOSUPP );
684  }
685 
686  dl3 = dlopen("libcupti.so", RTLD_NOW | RTLD_GLOBAL);
687  if (!dl3)
688  {
689  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA runtime library libcupti.so not found.",PAPI_MAX_STR_LEN);
690  return ( PAPI_ENOSUPP );
691  }
692  cuptiDeviceEnumEventDomainsPtr = dlsym(dl3, "cuptiDeviceEnumEventDomains");
693  if (dlerror() != NULL)
694  {
695  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceEnumEventDomains not found.",PAPI_MAX_STR_LEN);
696  return ( PAPI_ENOSUPP );
697  }
698  cuptiDeviceGetEventDomainAttributePtr = dlsym(dl3, "cuptiDeviceGetEventDomainAttribute");
699  if (dlerror() != NULL)
700  {
701  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceGetEventDomainAttribute not found.",PAPI_MAX_STR_LEN);
702  return ( PAPI_ENOSUPP );
703  }
704  cuptiDeviceGetNumEventDomainsPtr = dlsym(dl3, "cuptiDeviceGetNumEventDomains");
705  if (dlerror() != NULL)
706  {
707  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceGetNumEventDomains not found.",PAPI_MAX_STR_LEN);
708  return ( PAPI_ENOSUPP );
709  }
710  cuptiEventDomainEnumEventsPtr = dlsym(dl3, "cuptiEventDomainEnumEvents");
711  if (dlerror() != NULL)
712  {
713  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventDomainEnumEvents not found.",PAPI_MAX_STR_LEN);
714  return ( PAPI_ENOSUPP );
715  }
716  cuptiEventDomainGetNumEventsPtr = dlsym(dl3, "cuptiEventDomainGetNumEvents");
717  if (dlerror() != NULL)
718  {
719  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventDomainGetNumEvents not found.",PAPI_MAX_STR_LEN);
720  return ( PAPI_ENOSUPP );
721  }
722  cuptiEventGetAttributePtr = dlsym(dl3, "cuptiEventGetAttribute");
723  if (dlerror() != NULL)
724  {
725  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGetAttribute not found.",PAPI_MAX_STR_LEN);
726  return ( PAPI_ENOSUPP );
727  }
728  cuptiEventGroupAddEventPtr = dlsym(dl3, "cuptiEventGroupAddEvent");
729  if (dlerror() != NULL)
730  {
731  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupAddEvent not found.",PAPI_MAX_STR_LEN);
732  return ( PAPI_ENOSUPP );
733  }
734  cuptiEventGroupCreatePtr = dlsym(dl3, "cuptiEventGroupCreate");
735  if (dlerror() != NULL)
736  {
737  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupCreate not found.",PAPI_MAX_STR_LEN);
738  return ( PAPI_ENOSUPP );
739  }
740  cuptiEventGroupDestroyPtr = dlsym(dl3, "cuptiEventGroupDestroy");
741  if (dlerror() != NULL)
742  {
743  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupDestroy not found.",PAPI_MAX_STR_LEN);
744  return ( PAPI_ENOSUPP );
745  }
746  cuptiEventGroupDisablePtr = dlsym(dl3, "cuptiEventGroupDisable");
747  if (dlerror() != NULL)
748  {
749  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupDisable not found.",PAPI_MAX_STR_LEN);
750  return ( PAPI_ENOSUPP );
751  }
752  cuptiEventGroupEnablePtr = dlsym(dl3, "cuptiEventGroupEnable");
753  if (dlerror() != NULL)
754  {
755  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupEnable not found.",PAPI_MAX_STR_LEN);
756  return ( PAPI_ENOSUPP );
757  }
758  cuptiEventGroupReadAllEventsPtr = dlsym(dl3, "cuptiEventGroupReadAllEvents");
759  if (dlerror() != NULL)
760  {
761  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupReadAllEvents not found.",PAPI_MAX_STR_LEN);
762  return ( PAPI_ENOSUPP );
763  }
764  cuptiEventGroupRemoveAllEventsPtr = dlsym(dl3, "cuptiEventGroupRemoveAllEvents");
765  if (dlerror() != NULL)
766  {
767  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupRemoveAllEvents not found.",PAPI_MAX_STR_LEN);
768  return ( PAPI_ENOSUPP );
769  }
770  cuptiEventGroupResetAllEventsPtr = dlsym(dl3, "cuptiEventGroupResetAllEvents");
771  if (dlerror() != NULL)
772  {
773  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupResetAllEvents not found.",PAPI_MAX_STR_LEN);
774  return ( PAPI_ENOSUPP );
775  }
776 
777  return ( PAPI_OK );
778 }
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:632
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:41
#define PAPI_ENOSUPP
Definition: fpapi.h:123
papi_vector_t _cuda_vector
Definition: linux-cuda.c:1151

Here is the caller graph for this function:

memset ( eventId  ,
,
size   
)

Variable Documentation

papi_vector_t _cuda_vector

Definition at line 1151 of file linux-cuda.c.

void(* _dl_non_dynamic_init)(void)

Definition at line 41 of file linux-cuda.c.

122 {
123  CUresult err;
124  int skipDevice = 0;
125  int id;
126  char deviceName_tmp[PAPI_MIN_STR_LEN] = "init";
127 
128  totalEventCount = 0;
129 
130 /* CUDA initialization */
131  err = (*cuInitPtr)( 0 );
132  if ( err != CUDA_SUCCESS ) {
133  SUBDBG ("Info: Error from cuInit(): %d\n", err);
134  return ( PAPI_ENOSUPP );
135  }
136 
137  /* How many gpgpu devices do we have? */
138  err = (*cuDeviceGetCountPtr)( &deviceCount );
139  CHECK_CU_ERROR( err, "cuDeviceGetCount" );
140  if ( deviceCount == 0 )
141  return ( PAPI_ENOSUPP );
142 
143  /* allocate memory for device data table */
144  device = ( DeviceData_t * ) malloc( sizeof ( DeviceData_t ) * deviceCount );
145  if ( device == NULL ) {
146  perror( "malloc(): Failed to allocate memory to CUDA device table" );
147  return ( PAPI_ENOSUPP );
148  }
149 
150  /* What are the devices? Get Name and # of domains per device */
151  for ( id = 0; id < deviceCount; id++ ) {
152  err = (*cuDeviceGetPtr)( &device[id].dev, id );
153  CHECK_CU_ERROR( err, "cuDeviceGet" );
154 
155  err = (*cuDeviceGetNamePtr)( device[id].name, PAPI_MIN_STR_LEN, device[id].dev );
156  CHECK_CU_ERROR( err, "cuDeviceGetName" );
157 
158  SUBDBG ("Cuda deviceName: %s\n", device[id].name);
159 
160  /* Skip device if there are multiple of the same type
161  and if it has been already added to the list */
162  if ( 0 == strcmp( deviceName_tmp, device[id].name ) ) {
163  skipDevice++;
164  continue;
165  }
166 
167  strcpy( deviceName_tmp, device[id].name );
168 
169  /* enumerate the domains on the device */
170  if ( 0 != enumEventDomains( device[id].dev, id ) )
171  return ( PAPI_ENOSUPP );
172  }
173 
174  deviceCount = deviceCount - skipDevice;
175 
176  /* return number of events provided via CuPTI */
177  return totalEventCount;
178 }
CUdevice dev
Definition: linux-cuda.h:68
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.h:26
#define PAPI_MIN_STR_LEN
Definition: fpapi.h:41
static int deviceCount
Definition: linux-cuda.h:136
totalEventCount
Definition: linux-cuda.c:370
err
Definition: linux-cuda.c:323
#define PAPI_ENOSUPP
Definition: fpapi.h:123
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
uint32_t id
Definition: linux-cuda.c:293
static int enumEventDomains(CUdevice dev, int deviceId)
Definition: linux-cuda.c:185
strcpy(filename, default_filename)
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
char name[PAPI_MIN_STR_LEN]
Definition: linux-cuda.h:69
err
Initial value:
=
(*cuptiEventDomainEnumEventsPtr)( ( CUpti_EventDomainID ) device[deviceId].
domain[domainId].domainId, &size, eventId )
char *long long size
Definition: iozone.c:12023
static DeviceData_t * device
Definition: linux-cuda.h:155
CUpti_EventID * eventId
Definition: linux-cuda.c:291

Definition at line 323 of file linux-cuda.c.

device [deviceId] domain [domainId] event
Initial value:
=
( EventData_t * ) malloc( sizeof ( EventData_t ) *
device[deviceId].domain[domainId].
eventCount )
DomainData_t * domain
Definition: linux-cuda.h:71
static DeviceData_t * device
Definition: linux-cuda.h:155

Definition at line 306 of file linux-cuda.c.

eventId = NULL

Definition at line 291 of file linux-cuda.c.

uint32_t id = 0

Definition at line 293 of file linux-cuda.c.

int
static
Initial value:
{
CUptiResult err = CUPTI_SUCCESS
err
Definition: linux-cuda.c:323

Definition at line 289 of file linux-cuda.c.

return

Definition at line 372 of file linux-cuda.c.

size = 0

Definition at line 292 of file linux-cuda.c.

totalEventCount = device[deviceId].domain[domainId].eventCount

Definition at line 370 of file linux-cuda.c.