PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-cuda.h
Go to the documentation of this file.
1 /****************************/
2 /* THIS IS OPEN SOURCE CODE */
3 /****************************/
4 
18 #ifndef _PAPI_CUDA_H
19 #define _PAPI_CUDA_H
20 
21 /* Headers required by CuPTI */
22 #include "cupti_events.h"
23 #include <cuda_runtime_api.h>
24 
25 /* Specific errors from CUDA lib */
26 #define CHECK_CU_ERROR(err, cufunc) \
27 if (err != CUDA_SUCCESS) \
28 { \
29 printf ("Error %d for CUDA Driver API function '%s'. cuptiQuery failed\n", err, cufunc); \
30 return -1; \
31 }
32 
33 /* Specific errors from CuPTI lib */
34 #define CHECK_CUPTI_ERROR(err, cuptifunc) \
35 if (err != CUPTI_SUCCESS) \
36 { \
37 printf ("Error %d for CUPTI API function '%s'. cuptiQuery failed\n", err, cuptifunc); \
38 return -1; \
39 }
40 
41 
42 
43 /************************* DEFINES SECTION ***********************************
44  *******************************************************************************/
45 
46 /* this number assumes that there will never be more events than indicated */
47 #define CUDA_MAX_COUNTERS 512
48 
49 typedef struct EventData
50 {
51  CUpti_EventID eventId; // CuPTI event id
52  char name[PAPI_MIN_STR_LEN]; // event name
53  char desc[PAPI_2MAX_STR_LEN]; // short desc of the event
54 } EventData_t;
55 
56 
57 typedef struct DomainData
58 {
59  CUpti_EventDomainID domainId; // CuPTI domain id
60  char name[PAPI_MIN_STR_LEN]; // domain name
61  uint32_t eventCount; // number of events per domain
63 } DomainData_t;
64 
65 
66 typedef struct DeviceData
67 {
68  CUdevice dev; // CUDA device
69  char name[PAPI_MIN_STR_LEN]; // device name
70  uint32_t domainCount; // number of domains per device
72 } DeviceData_t;
73 
74 
75 typedef struct AddedEvents
76 {
77  int count; // number of events that have been added to the CuPTI eventGroup
78  int *list; // list of the added events
80 
81 
83 typedef struct CUDA_register
84 {
85  /* This is used by the framework.It likes it to be !=0 to do somehting */
86  unsigned int selector;
87  /* This is the information needed to locate a CUDA event */
88  CUpti_EventID eventId;
90 
91 
93 typedef struct CUDA_native_event_entry
94 {
97  char description[PAPI_2MAX_STR_LEN];
99 
100 
101 typedef struct CUDA_reg_alloc
102 {
105 
106 
107 typedef struct CUDA_control_state
108 {
109  CUpti_EventGroup eventGroup;
111  long long counts[CUDA_MAX_COUNTERS];
112  int ncounter;
114 
115 /* Holds per-thread information */
116 typedef struct CUDA_context
117 {
120 
121 
122 /************************* GLOBALS SECTION ***********************************
123  *******************************************************************************/
124 
125 static int enumEventDomains( CUdevice dev, int deviceId );
126 #ifdef CUDA_4_0
127 static int enumEvents( CUdevice dev, int domainId, int eventCount );
128 #else
129 static int enumEvents( int domainId, int eventCount );
130 #endif
131 
132 /* This table contains the CUDA native events */
134 /* number of events in the table */
135 static int NUM_EVENTS = 0;
136 static int deviceCount = 0;
137 static int totalDomainCount = 0;
138 static int totalEventCount = 0;
139 static int currentDeviceID; /* determine the actual device the user code is running on */
140 static int CUDA_FREED = 0;
141 
142 /*
143  * Why are device and cuCtx globals?
144  *
145  * Starting in CUDA 4.0, multiple CPU threads can access the same CUDA context.
146  * This is a much easier programming model then pre-4.0 as threads - using the
147  * same context - can share memory, data, etc.
148  * It's possible to create a different context for each thread, but then we are
149  * likely running into a limitation that only one context can be profiled at a time.
150  * ==> and we don't want this. That's why CUDA context creation is done in
151  * CUDA_init_component() (called only by main thread) rather than CUDA_init_thread()
152  * or CUDA_init_control_state() (both called by each thread).
153  */
154 
156 static CUcontext cuCtx;
157 
158 #endif /* _PAPI_CUDA_H */
CUdevice dev
Definition: linux-cuda.h:68
CUDA_control_state_t state
Definition: linux-cuda.h:118
#define CUDA_MAX_COUNTERS
Definition: linux-cuda.h:47
static int enumEventDomains(CUdevice dev, int deviceId)
AddedEvents_t addedEvents
Definition: linux-cuda.h:110
static int enumEvents(int domainId, int eventCount)
static int deviceCount
Definition: linux-cuda.h:136
totalEventCount
Definition: linux-cuda.c:370
CUDA_register_t ra_bits
Definition: linux-cuda.h:103
CUpti_EventID eventId
Definition: linux-cuda.h:51
CUpti_EventID eventId
Definition: linux-cuda.h:88
#define PAPI_2MAX_STR_LEN
Definition: papi.h:464
static CUcontext cuCtx
Definition: linux-cuda.h:156
static int currentDeviceID
Definition: linux-cuda.h:139
uint32_t eventCount
Definition: linux-cuda.h:61
static CUDA_native_event_entry_t * cuda_native_table
Definition: linux-cuda.h:133
EventData_t * event
Definition: linux-cuda.h:62
uint32_t domainCount
Definition: linux-cuda.h:70
DomainData_t * domain
Definition: linux-cuda.h:71
static int totalDomainCount
Definition: linux-cuda.h:137
static DeviceData_t * device
Definition: linux-cuda.h:155
char * name
Definition: iozone.c:23648
#define PAPI_MIN_STR_LEN
Definition: papi.h:462
Definition: linux-cuda.h:93
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
CUpti_EventDomainID domainId
Definition: linux-cuda.h:59
static int NUM_EVENTS
Definition: linux-cuda.h:135
CUDA_register_t resources
Definition: linux-cuda.h:95
CUpti_EventGroup eventGroup
Definition: linux-cuda.h:109
static int CUDA_FREED
Definition: linux-cuda.h:140
unsigned int selector
Definition: linux-cuda.h:86