PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perf_event.c
Go to the documentation of this file.
1 /*
2 * File: perf_event.c
3 *
4 * Author: Corey Ashford
5 * cjashfor@us.ibm.com
6 * - based upon perfmon.c written by -
7 * Philip Mucci
8 * mucci@cs.utk.edu
9 * Mods: Gary Mohr
10 * gary.mohr@bull.com
11 * Mods: Vince Weaver
12 * vweaver1@eecs.utk.edu
13 * Mods: Philip Mucci
14 * mucci@eecs.utk.edu
15 * Mods: Gary Mohr
16 * gary.mohr@bull.com
17 * Modified the perf_event component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4.
18 * This adds several new event masks, including cpu=, u=, and k= which give the user
19 * the ability to set cpu number to use or control the domain (user, kernel, or both)
20 * in which the counter should be incremented. These are event masks so it is now
21 * possible to have multiple events in the same event set that count activity from
22 * differennt cpu's or count activity in different domains.
23 */
24 
25 
26 #include <fcntl.h>
27 #include <string.h>
28 #include <errno.h>
29 #include <signal.h>
30 #include <syscall.h>
31 #include <sys/utsname.h>
32 #include <sys/mman.h>
33 #include <sys/ioctl.h>
34 
35 /* PAPI-specific includes */
36 #include "papi.h"
37 #include "papi_memory.h"
38 #include "papi_internal.h"
39 #include "papi_vector.h"
40 #include "extras.h"
41 
42 /* libpfm4 includes */
43 #include "papi_libpfm4_events.h"
44 #include "pe_libpfm4_events.h"
45 #include "perfmon/pfmlib.h"
46 #include PEINCLUDE
47 
48 /* Linux-specific includes */
49 #include "mb.h"
50 #include "linux-memory.h"
51 #include "linux-timer.h"
52 #include "linux-common.h"
53 #include "linux-context.h"
54 
55 #include "perf_event_lib.h"
56 
57 /* Defines for ctx->state */
58 #define PERF_EVENTS_OPENED 0x01
59 #define PERF_EVENTS_RUNNING 0x02
60 
61 /* Static globals */
63 
64 /* Forward declaration */
66 
67 /* Globals */
69 static int our_cidx;
70 int
72  return our_cidx;
73 }
74 
75 /* These sentinels tell _pe_set_overflow() how to set the */
76 /* wakeup_events field in the event descriptor record. */
77 
78 #define WAKEUP_COUNTER_OVERFLOW 0
79 #define WAKEUP_PROFILING -1
80 
81 #define WAKEUP_MODE_COUNTER_OVERFLOW 0
82 #define WAKEUP_MODE_PROFILING 1
83 
84 /* The kernel developers say to never use a refresh value of 0 */
85 /* See https://lkml.org/lkml/2011/5/24/172 */
86 /* However, on some platforms (like Power) a value of 1 does not work */
87 /* We're still tracking down why this happens. */
88 
89 #if defined(__powerpc__)
90 #define PAPI_REFRESH_VALUE 0
91 #else
92 #define PAPI_REFRESH_VALUE 1
93 #endif
94 
95 static int _pe_set_domain( hwd_control_state_t *ctl, int domain);
96 
97 /* Check for processor support */
98 /* Can be used for generic checking, though in general we only */
99 /* check for pentium4 here because support was broken for multiple */
100 /* kernel releases and the usual standard detections did not */
101 /* handle this. So we check for pentium 4 explicitly. */
102 static int
103 processor_supported(int vendor, int family) {
104 
105  /* Error out if kernel too early to support p4 */
106  if (( vendor == PAPI_VENDOR_INTEL ) && (family == 15)) {
107  if (_papi_os_info.os_version < LINUX_VERSION(2,6,35)) {
108  PAPIERROR("Pentium 4 not supported on kernels before 2.6.35");
109  return PAPI_ENOSUPP;
110  }
111  }
112  return PAPI_OK;
113 }
114 
115 /* Fix up the config based on what CPU/Vendor we are running on */
116 static int
118 {
119  /* powerpc */
120  /* On IBM and Power6 Machines default domain should include supervisor */
122  vector->cmp_info.available_domains |=
124  if (strcmp(_papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0 ) {
125  vector->cmp_info.default_domain =
127  }
128  }
129 
132  }
133 
136  vector->cmp_info.fast_real_timer = 1;
137  }
138  /* ARM */
140  /* FIXME: this will change with Cortex A15 */
141  vector->cmp_info.available_domains |=
143  vector->cmp_info.default_domain =
145  }
146 
147  /* CRAY */
150  }
151 
152  return PAPI_OK;
153 }
154 
155 
156 
157 /******************************************************************/
158 /******** Kernel Version Dependent Routines **********************/
159 /******************************************************************/
160 
161 /* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch
162  * implementations (e.g. x86) which don't do a static event scheduability
163  * check in sys_perf_event_open.
164  * This was fixed for x86 in the 2.6.33 kernel
165  *
166  * Also! Kernels newer than 2.6.34 will fail in a similar way
167  * if the nmi_watchdog has stolen a performance counter
168  * and we try to use the maximum number of counters.
169  * A sys_perf_event_open() will seem to succeed but will fail
170  * at read time. So re-use this work around code.
171  */
172 static int
174 
175 #if defined(__powerpc__)
176  /* PowerPC not affected by this bug */
177 #elif defined(__mips__)
178  /* MIPS as of kernel 3.1 does not properly detect schedulability */
179  return 1;
180 #else
181  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
182 #endif
183 
184  if (nmi_watchdog_active) return 1;
185 
186  return 0;
187 }
188 
189 /* PERF_FORMAT_GROUP allows reading an entire group's counts at once */
190 /* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results */
191 /* from attached processes. We are lazy and disable it for all cases */
192 /* commit was: 050735b08ca8a016bbace4445fa025b88fee770b */
193 
194 static int
196 
197  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) return 1;
198 
199  /* MIPS, as of version 3.1, does not support this properly */
200 
201 #if defined(__mips__)
202  return 1;
203 #endif
204 
205  return 0;
206 
207 }
208 
209 
210 /* There's a bug prior to Linux 2.6.33 where if you are using */
211 /* PERF_FORMAT_GROUP, the TOTAL_TIME_ENABLED and */
212 /* TOTAL_TIME_RUNNING fields will be zero unless you disable */
213 /* the counters first */
214 static int
216 
217  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
218 
219  return 0;
220 
221 }
222 
223 
224 /* Set the F_SETOWN_EX flag on the fd. */
225 /* This affects which thread an overflow signal gets sent to */
226 /* Handled in a subroutine to handle the fact that the behavior */
227 /* is dependent on kernel version. */
228 static int
230 
231  int ret;
232  struct f_owner_ex fown_ex;
233 
234  /* F_SETOWN_EX is not available until 2.6.32 */
235  if (_papi_os_info.os_version < LINUX_VERSION(2,6,32)) {
236 
237  /* get ownership of the descriptor */
238  ret = fcntl( fd, F_SETOWN, mygettid( ) );
239  if ( ret == -1 ) {
240  PAPIERROR( "cannot fcntl(F_SETOWN) on %d: %s", fd, strerror(errno) );
241  return PAPI_ESYS;
242  }
243  }
244  else {
245  /* set ownership of the descriptor */
246  fown_ex.type = F_OWNER_TID;
247  fown_ex.pid = mygettid();
248  ret = fcntl(fd, F_SETOWN_EX, (unsigned long)&fown_ex );
249 
250  if ( ret == -1 ) {
251  PAPIERROR( "cannot fcntl(F_SETOWN_EX) on %d: %s",
252  fd, strerror( errno ) );
253  return PAPI_ESYS;
254  }
255  }
256  return PAPI_OK;
257 }
258 
259 /* The read format on perf_event varies based on various flags that */
260 /* are passed into it. This helper avoids copying this logic */
261 /* multiple places. */
262 static unsigned int
264  unsigned int inherit,
265  int format_group )
266 {
267  unsigned int format = 0;
268 
269  /* if we need read format options for multiplexing, add them now */
270  if (multiplex) {
271  format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
272  format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
273  }
274 
275  /* if our kernel supports it and we are not using inherit, */
276  /* add the group read options */
277  if ( (!bug_format_group()) && !inherit) {
278  if (format_group) {
279  format |= PERF_FORMAT_GROUP;
280  }
281  }
282 
283  SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n",
284  multiplex, inherit, format_group, format);
285 
286  return format;
287 }
288 
289 /*****************************************************************/
290 /********* End Kernel-version Dependent Routines ****************/
291 /*****************************************************************/
292 
293 /*****************************************************************/
294 /********* Begin perf_event low-level code ***********************/
295 /*****************************************************************/
296 
297 /* In case headers aren't new enough to have __NR_perf_event_open */
298 #ifndef __NR_perf_event_open
299 
300 #ifdef __powerpc__
301 #define __NR_perf_event_open 319
302 #elif defined(__x86_64__)
303 #define __NR_perf_event_open 298
304 #elif defined(__i386__)
305 #define __NR_perf_event_open 336
306 #elif defined(__arm__) 366+0x900000
307 #define __NR_perf_event_open
308 #endif
309 
310 #endif
311 
312 static long
313 sys_perf_event_open( struct perf_event_attr *hw_event, pid_t pid, int cpu,
314  int group_fd, unsigned long flags )
315 {
316  int ret;
317 
318  SUBDBG("sys_perf_event_open(hw_event: %p, pid: %d, cpu: %d, group_fd: %d, flags: %lx\n", hw_event, pid, cpu, group_fd, flags);
319  SUBDBG(" type: %d\n",hw_event->type);
320  SUBDBG(" size: %d\n",hw_event->size);
321  SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",hw_event->config, hw_event->config);
322  SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period);
323  SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type);
324  SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format);
325  SUBDBG(" disabled: %d\n",hw_event->disabled);
326  SUBDBG(" inherit: %d\n",hw_event->inherit);
327  SUBDBG(" pinned: %d\n",hw_event->pinned);
328  SUBDBG(" exclusive: %d\n",hw_event->exclusive);
329  SUBDBG(" exclude_user: %d\n",hw_event->exclude_user);
330  SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel);
331  SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv);
332  SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle);
333  SUBDBG(" mmap: %d\n",hw_event->mmap);
334  SUBDBG(" comm: %d\n",hw_event->comm);
335  SUBDBG(" freq: %d\n",hw_event->freq);
336  SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat);
337  SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec);
338  SUBDBG(" task: %d\n",hw_event->task);
339  SUBDBG(" watermark: %d\n",hw_event->watermark);
340  SUBDBG(" precise_ip: %d\n",hw_event->precise_ip);
341  SUBDBG(" mmap_data: %d\n",hw_event->mmap_data);
342  SUBDBG(" sample_id_all: %d\n",hw_event->sample_id_all);
343  SUBDBG(" exclude_host: %d\n",hw_event->exclude_host);
344  SUBDBG(" exclude_guest: %d\n",hw_event->exclude_guest);
345  SUBDBG(" exclude_callchain_kernel: %d\n",hw_event->exclude_callchain_kernel);
346  SUBDBG(" exclude_callchain_user: %d\n",hw_event->exclude_callchain_user);
347  SUBDBG(" wakeup_events: %"PRIx32" (%"PRIu32")\n", hw_event->wakeup_events, hw_event->wakeup_events);
348  SUBDBG(" bp_type: %"PRIx32" (%"PRIu32")\n", hw_event->bp_type, hw_event->bp_type);
349  SUBDBG(" config1: %"PRIx64" (%"PRIu64")\n", hw_event->config1, hw_event->config1);
350  SUBDBG(" config2: %"PRIx64" (%"PRIu64")\n", hw_event->config2, hw_event->config2);
351  SUBDBG(" branch_sample_type: %"PRIx64" (%"PRIu64")\n", hw_event->branch_sample_type, hw_event->branch_sample_type);
352  SUBDBG(" sample_regs_user: %"PRIx64" (%"PRIu64")\n", hw_event->sample_regs_user, hw_event->sample_regs_user);
353  SUBDBG(" sample_stack_user: %"PRIx32" (%"PRIu32")\n", hw_event->sample_stack_user, hw_event->sample_stack_user);
354 
355  ret =
356  syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags );
357  SUBDBG("Returned %d %d %s\n",ret,
358  ret<0?errno:0,
359  ret<0?strerror(errno):" ");
360  return ret;
361 }
362 
363 
364 static int map_perf_event_errors_to_papi(int perf_event_error) {
365 
366  int ret;
367 
368  /* These mappings are approximate.
369  EINVAL in particular can mean lots of different things */
370  switch(perf_event_error) {
371  case EPERM:
372  case EACCES:
373  ret = PAPI_EPERM;
374  break;
375  case ENODEV:
376  case EOPNOTSUPP:
377  ret = PAPI_ENOSUPP;
378  break;
379  case ENOENT:
380  ret = PAPI_ENOEVNT;
381  break;
382  case ENOSYS:
383  case EAGAIN:
384  case EBUSY:
385  case E2BIG: /* Only happens if attr is the wrong size somehow */
386  case EBADF: /* We are attempting to group with an invalid file descriptor */
387  ret = PAPI_ESYS;
388  break;
389  case ENOMEM:
390  ret = PAPI_ENOMEM;
391  break;
392  case EMFILE: /* Out of file descriptors. Typically max out at 1024 */
393  ret = PAPI_ECOUNT;
394  break;
395  case EINVAL:
396  default:
397  ret = PAPI_EINVAL;
398  break;
399  }
400  return ret;
401 }
402 
403 
405 /* perf_events. */
406 /* We do this by temporarily opening an event with the */
407 /* desired options then closing it again. We use the */
408 /* PERF_COUNT_HW_INSTRUCTION event as a dummy event */
409 /* on the assumption it is available on all */
410 /* platforms. */
411 
412 static int
413 check_permissions( unsigned long tid,
414  unsigned int cpu_num,
415  unsigned int domain,
416  unsigned int granularity,
417  unsigned int multiplex,
418  unsigned int inherit )
419 {
420  int ev_fd;
421  struct perf_event_attr attr;
422 
423  long pid;
424 
425  /* clearing this will set a type of hardware and to count all domains */
426  memset(&attr, '\0', sizeof(attr));
427  attr.read_format = get_read_format(multiplex, inherit, 1);
428 
429  /* set the event id (config field) to instructios */
430  /* (an event that should always exist) */
431  /* This was cycles but that is missing on Niagara */
432  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
433 
434  /* now set up domains this event set will be counting */
435  if (!(domain & PAPI_DOM_SUPERVISOR)) {
436  attr.exclude_hv = 1;
437  }
438  if (!(domain & PAPI_DOM_USER)) {
439  attr.exclude_user = 1;
440  }
441  if (!(domain & PAPI_DOM_KERNEL)) {
442  attr.exclude_kernel = 1;
443  }
444 
445  if (granularity==PAPI_GRN_SYS) {
446  pid = -1;
447  } else {
448  pid = tid;
449  }
450 
451  SUBDBG("Calling sys_perf_event_open() from check_permissions\n");
452 
453  ev_fd = sys_perf_event_open( &attr, pid, cpu_num, -1, 0 );
454  if ( ev_fd == -1 ) {
455  SUBDBG("sys_perf_event_open returned error. Linux says, %s",
456  strerror( errno ) );
458  }
459 
460  /* now close it, this was just to make sure we have permissions */
461  /* to set these options */
462  close(ev_fd);
463  return PAPI_OK;
464 }
465 
466 /* Maximum size we ever expect to read from a perf_event fd */
467 /* (this is the number of 64-bit values) */
468 /* We use this to size the read buffers */
469 /* The three is for event count, time_enabled, time_running */
470 /* and the counter term is count value and count id for each */
471 /* possible counter value. */
472 #define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS))
473 
474 
475 
476 /* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch */
477 /* implementations (e.g. x86 before 2.6.33) which don't do a static event */
478 /* scheduability check in sys_perf_event_open. It is also needed if the */
479 /* kernel is stealing an event, such as when NMI watchdog is enabled. */
480 
481 static int
483 {
484  int retval = 0, cnt = -1;
485  ( void ) ctx; /*unused */
486  long long papi_pe_buffer[READ_BUFFER_SIZE];
487  int i,group_leader_fd;
488 
489  if (bug_check_scheduability()) {
490 
491  /* If the kernel isn't tracking scheduability right */
492  /* Then we need to start/stop/read to force the event */
493  /* to be scheduled and see if an error condition happens. */
494 
495  /* get the proper fd to start */
496  group_leader_fd=ctl->events[idx].group_leader_fd;
497  if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
498 
499  /* start the event */
500  retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
501  if (retval == -1) {
502  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
503  return PAPI_ESYS;
504  }
505 
506  /* stop the event */
507  retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
508  if (retval == -1) {
509  PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
510  return PAPI_ESYS;
511  }
512 
513  /* See if a read returns any results */
514  cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
515  if ( cnt == -1 ) {
516  SUBDBG( "read returned an error! Should never happen.\n" );
517  return PAPI_ESYS;
518  }
519 
520  if ( cnt == 0 ) {
521  /* We read 0 bytes if we could not schedule the event */
522  /* The kernel should have detected this at open */
523  /* but various bugs (including NMI watchdog) */
524  /* result in this behavior */
525 
526  return PAPI_ECNFLCT;
527 
528  } else {
529 
530  /* Reset all of the counters (opened so far) back to zero */
531  /* from the above brief enable/disable call pair. */
532 
533  /* We have to reset all events because reset of group leader */
534  /* does not reset all. */
535  /* we assume that the events are being added one by one and that */
536  /* we do not need to reset higher events (doing so may reset ones */
537  /* that have not been initialized yet. */
538 
539  /* Note... PERF_EVENT_IOC_RESET does not reset time running */
540  /* info if multiplexing, so we should avoid coming here if */
541  /* we are multiplexing the event. */
542  for( i = 0; i < idx; i++) {
543  retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
544  if (retval == -1) {
545  PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
546  "(fd %d)failed",
547  i,ctl->num_events,idx,ctl->events[i].event_fd);
548  return PAPI_ESYS;
549  }
550  }
551  }
552  }
553  return PAPI_OK;
554 }
555 
556 
557 /* Do some extra work on a perf_event fd if we're doing sampling */
558 /* This mostly means setting up the mmap buffer. */
559 static int
560 tune_up_fd( pe_control_t *ctl, int evt_idx )
561 {
562  int ret;
563  void *buf_addr;
564  int fd = ctl->events[evt_idx].event_fd;
565 
566  /* Register that we would like a SIGIO notification when a mmap'd page */
567  /* becomes full. */
568  ret = fcntl( fd, F_SETFL, O_ASYNC | O_NONBLOCK );
569  if ( ret ) {
570  PAPIERROR ( "fcntl(%d, F_SETFL, O_ASYNC | O_NONBLOCK) "
571  "returned error: %s", fd, strerror( errno ) );
572  return PAPI_ESYS;
573  }
574 
575  /* Set the F_SETOWN_EX flag on the fd. */
576  /* This affects which thread an overflow signal gets sent to. */
577  ret=fcntl_setown_fd(fd);
578  if (ret!=PAPI_OK) return ret;
579 
580  /* Set FD_CLOEXEC. Otherwise if we do an exec with an overflow */
581  /* running, the overflow handler will continue into the exec()'d*/
582  /* process and kill it because no signal handler is set up. */
583  ret=fcntl(fd, F_SETFD, FD_CLOEXEC);
584  if (ret) {
585  return PAPI_ESYS;
586  }
587 
588  /* when you explicitely declare that you want a particular signal, */
589  /* even with you use the default signal, the kernel will send more */
590  /* information concerning the event to the signal handler. */
591  /* */
592  /* In particular, it will send the file descriptor from which the */
593  /* event is originating which can be quite useful when monitoring */
594  /* multiple tasks from a single thread. */
595  ret = fcntl( fd, F_SETSIG, ctl->overflow_signal );
596  if ( ret == -1 ) {
597  PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s",
598  ctl->overflow_signal, fd,
599  strerror( errno ) );
600  return PAPI_ESYS;
601  }
602 
603  /* mmap() the sample buffer */
604  buf_addr = mmap( NULL, ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
605  PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
606  if ( buf_addr == MAP_FAILED ) {
607  PAPIERROR( "mmap(NULL,%d,%d,%d,%d,0): %s",
608  ctl->events[evt_idx].nr_mmap_pages * getpagesize( ),
609  PROT_READ, MAP_SHARED, fd, strerror( errno ) );
610  return ( PAPI_ESYS );
611  }
612 
613  SUBDBG( "Sample buffer for fd %d is located at %p\n", fd, buf_addr );
614 
615  /* Set up the mmap buffer and its associated helpers */
616  ctl->events[evt_idx].mmap_buf = (struct perf_counter_mmap_page *) buf_addr;
617  ctl->events[evt_idx].tail = 0;
618  ctl->events[evt_idx].mask = ( ctl->events[evt_idx].nr_mmap_pages - 1 ) *
619  getpagesize() - 1;
620 
621  return PAPI_OK;
622 }
623 
624 
625 
626 /* Open all events in the control state */
627 static int
629 {
630 
631  int i, ret = PAPI_OK;
632  long pid;
633 
634  if (ctl->granularity==PAPI_GRN_SYS) {
635  pid = -1;
636  }
637  else {
638  pid = ctl->tid;
639  }
640 
641  for( i = 0; i < ctl->num_events; i++ ) {
642 
643  ctl->events[i].event_opened=0;
644 
645  /* set up the attr structure. We don't set up all fields here */
646  /* as some have already been set up previously. */
647 
648  /* group leader (event 0) is special */
649  /* If we're multiplexed, everyone is a group leader */
650  if (( i == 0 ) || (ctl->multiplexed)) {
651  ctl->events[i].attr.pinned = !ctl->multiplexed;
652  ctl->events[i].attr.disabled = 1;
653  ctl->events[i].group_leader_fd=-1;
654  ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed,
655  ctl->inherit,
656  !ctl->multiplexed );
657  } else {
658  ctl->events[i].attr.pinned=0;
659  ctl->events[i].attr.disabled = 0;
660  ctl->events[i].group_leader_fd=ctl->events[0].event_fd;
661  ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed,
662  ctl->inherit,
663  0 );
664  }
665 
666 
667  /* try to open */
668  ctl->events[i].event_fd = sys_perf_event_open( &ctl->events[i].attr,
669  pid,
670  ctl->events[i].cpu,
671  ctl->events[i].group_leader_fd,
672  0 /* flags */
673  );
674 
675  /* Try to match Linux errors to PAPI errors */
676  if ( ctl->events[i].event_fd == -1 ) {
677  SUBDBG("sys_perf_event_open returned error on event #%d."
678  " Error: %s\n",
679  i, strerror( errno ) );
681 
682  goto open_pe_cleanup;
683  }
684 
685  SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d,"
686  " group_leader/fd: %d, event_fd: %d,"
687  " read_format: %"PRIu64"\n",
688  pid, ctl->events[i].cpu, ctl->events[i].group_leader_fd,
689  ctl->events[i].event_fd, ctl->events[i].attr.read_format);
690 
691 
692  /* in many situations the kernel will indicate we opened fine */
693  /* yet things will fail later. So we need to double check */
694  /* we actually can use the events we've set up. */
695 
696  /* This is not necessary if we are multiplexing, and in fact */
697  /* we cannot do this properly if multiplexed because */
698  /* PERF_EVENT_IOC_RESET does not reset the time running info */
699  if (!ctl->multiplexed) {
700  ret = check_scheduability( ctx, ctl, i );
701 
702  if ( ret != PAPI_OK ) {
703  /* the last event did open, so we need to bump the counter */
704  /* before doing the cleanup */
705  i++;
706  goto open_pe_cleanup;
707  }
708  }
709  ctl->events[i].event_opened=1;
710  }
711 
712  /* Now that we've successfully opened all of the events, do whatever */
713  /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */
714  /* and so on. */
715  for ( i = 0; i < ctl->num_events; i++ ) {
716 
717  /* If sampling is enabled, hook up signal handler */
718  if ((ctl->events[i].attr.sample_period) && (ctl->events[i].nr_mmap_pages > 0)) {
719  ret = tune_up_fd( ctl, i );
720  if ( ret != PAPI_OK ) {
721  /* All of the fds are open, so we need to clean up all of them */
722  i = ctl->num_events;
723  goto open_pe_cleanup;
724  }
725  } else {
726  /* Make sure this is NULL so close_pe_events works right */
727  ctl->events[i].mmap_buf = NULL;
728  }
729  }
730 
731  /* Set num_evts only if completely successful */
732  ctx->state |= PERF_EVENTS_OPENED;
733 
734  return PAPI_OK;
735 
736 open_pe_cleanup:
737  /* We encountered an error, close up the fds we successfully opened. */
738  /* We go backward in an attempt to close group leaders last, although */
739  /* That's probably not strictly necessary. */
740  while ( i > 0 ) {
741  i--;
742  if (ctl->events[i].event_fd>=0) {
743  close( ctl->events[i].event_fd );
744  ctl->events[i].event_opened=0;
745  }
746  }
747 
748  return ret;
749 }
750 
751 /* Close all of the opened events */
752 static int
754 {
755  int i;
756  int num_closed=0;
757  int events_not_opened=0;
758 
759  /* should this be a more serious error? */
760  if ( ctx->state & PERF_EVENTS_RUNNING ) {
761  SUBDBG("Closing without stopping first\n");
762  }
763 
764  /* Close child events first */
765  for( i=0; i<ctl->num_events; i++ ) {
766 
767  if (ctl->events[i].event_opened) {
768 
769  if (ctl->events[i].group_leader_fd!=-1) {
770  if ( ctl->events[i].mmap_buf ) {
771  if ( munmap ( ctl->events[i].mmap_buf,
772  ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
773  PAPIERROR( "munmap of fd = %d returned error: %s",
774  ctl->events[i].event_fd, strerror( errno ) );
775  return PAPI_ESYS;
776  }
777  }
778 
779  if ( close( ctl->events[i].event_fd ) ) {
780  PAPIERROR( "close of fd = %d returned error: %s",
781  ctl->events[i].event_fd, strerror( errno ) );
782  return PAPI_ESYS;
783  } else {
784  num_closed++;
785  }
786  ctl->events[i].event_opened=0;
787  }
788  }
789  else {
790  events_not_opened++;
791  }
792  }
793 
794  /* Close the group leaders last */
795  for( i=0; i<ctl->num_events; i++ ) {
796 
797  if (ctl->events[i].event_opened) {
798 
799  if (ctl->events[i].group_leader_fd==-1) {
800  if ( ctl->events[i].mmap_buf ) {
801  if ( munmap ( ctl->events[i].mmap_buf,
802  ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
803  PAPIERROR( "munmap of fd = %d returned error: %s",
804  ctl->events[i].event_fd, strerror( errno ) );
805  return PAPI_ESYS;
806  }
807  }
808 
809 
810  if ( close( ctl->events[i].event_fd ) ) {
811  PAPIERROR( "close of fd = %d returned error: %s",
812  ctl->events[i].event_fd, strerror( errno ) );
813  return PAPI_ESYS;
814  } else {
815  num_closed++;
816  }
817  ctl->events[i].event_opened=0;
818  }
819  }
820  }
821 
822 
823  if (ctl->num_events!=num_closed) {
824  if (ctl->num_events!=(num_closed+events_not_opened)) {
825  PAPIERROR("Didn't close all events: "
826  "Closed %d Not Opened: %d Expected %d",
827  num_closed,events_not_opened,ctl->num_events);
828  return PAPI_EBUG;
829  }
830  }
831 
832  ctl->num_events=0;
833 
834  ctx->state &= ~PERF_EVENTS_OPENED;
835 
836  return PAPI_OK;
837 }
838 
839 
840 /********************************************************************/
841 /********************************************************************/
842 /* Functions that are exported via the component interface */
843 /********************************************************************/
844 /********************************************************************/
845 
846 
847 /* set the domain. perf_events allows per-event control of this, papi allows it to be set at the event level or at the event set level. */
848 /* this will set the event set level domain values but they only get used if no event level domain mask (u= or k=) was specified. */
849 static int
851 {
852  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
853 
854  SUBDBG("old control domain %d, new domain %d\n", pe_ctl->domain,domain);
855  pe_ctl->domain = domain;
856  return PAPI_OK;
857 }
858 
859 /* Shutdown a thread */
860 int
862 {
863  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
864 
865  pe_ctx->initialized=0;
866 
867  return PAPI_OK;
868 }
869 
870 
871 /* reset the hardware counters */
872 /* Note: PAPI_reset() does not necessarily call this */
873 /* unless the events are actually running. */
874 int
876 {
877  int i, ret;
878  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
879 
880  ( void ) ctx; /*unused */
881 
882  /* We need to reset all of the events, not just the group leaders */
883  for( i = 0; i < pe_ctl->num_events; i++ ) {
884  ret = ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
885  if ( ret == -1 ) {
886  PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
887  "returned error, Linux says: %s",
888  pe_ctl->events[i].event_fd, strerror( errno ) );
889  return PAPI_ESYS;
890  }
891  }
892 
893  return PAPI_OK;
894 }
895 
896 
897 /* write (set) the hardware counters */
898 /* Current we do not support this. */
899 int
901  long long *from )
902 {
903  ( void ) ctx; /*unused */
904  ( void ) ctl; /*unused */
905  ( void ) from; /*unused */
906  /*
907  * Counters cannot be written. Do we need to virtualize the
908  * counters so that they can be written, or perhaps modify code so that
909  * they can be written? FIXME ?
910  */
911 
912  return PAPI_ENOSUPP;
913 }
914 
915 /*
916  * perf_event provides a complicated read interface.
917  * the info returned by read() varies depending on whether
918  * you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED,
919  * PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set
920  *
921  * To simplify things we just always ask for everything. This might
922  * lead to overhead when reading more than we need, but it makes the
923  * read code a lot simpler than the original implementation we had here.
924  *
925  * For more info on the layout see include/linux/perf_event.h
926  *
927  */
928 
929 int
931  long long **events, int flags )
932 {
933  SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n", ctx, ctl, events, flags);
934 
935  ( void ) flags; /*unused */
936  int i, ret = -1;
937  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
938  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
939  long long papi_pe_buffer[READ_BUFFER_SIZE];
940  long long tot_time_running, tot_time_enabled, scale;
941 
942  /* On kernels before 2.6.33 the TOTAL_TIME_ENABLED and TOTAL_TIME_RUNNING */
943  /* fields are always 0 unless the counter is disabled. So if we are on */
944  /* one of these kernels, then we must disable events before reading. */
945 
946  /* Elsewhere though we disable multiplexing on kernels before 2.6.34 */
947  /* so maybe this isn't even necessary. */
948 
949  if (bug_sync_read()) {
950  if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
951  for ( i = 0; i < pe_ctl->num_events; i++ ) {
952  /* disable only the group leaders */
953  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
954  ret = ioctl( pe_ctl->events[i].event_fd,
955  PERF_EVENT_IOC_DISABLE, NULL );
956  if ( ret == -1 ) {
957  PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) "
958  "returned an error: ", strerror( errno ));
959  return PAPI_ESYS;
960  }
961  }
962  }
963  }
964  }
965 
966 
967  /* Handle case where we are multiplexing */
968  if (pe_ctl->multiplexed) {
969 
970  /* currently we handle multiplexing by having individual events */
971  /* so we read from each in turn. */
972 
973  for ( i = 0; i < pe_ctl->num_events; i++ ) {
974 
975  ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer,
976  sizeof ( papi_pe_buffer ) );
977  if ( ret == -1 ) {
978  PAPIERROR("read returned an error: ", strerror( errno ));
979  return PAPI_ESYS;
980  }
981 
982  /* We should read 3 64-bit values from the counter */
983  if (ret<(signed)(3*sizeof(long long))) {
984  PAPIERROR("Error! short read");
985  return PAPI_ESYS;
986  }
987 
988  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
989  pe_ctl->events[i].event_fd,
990  (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
991  SUBDBG("read: %lld %lld %lld\n",papi_pe_buffer[0],
992  papi_pe_buffer[1],papi_pe_buffer[2]);
993 
994  tot_time_enabled = papi_pe_buffer[1];
995  tot_time_running = papi_pe_buffer[2];
996 
997  SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * "
998  "tot_time_enabled %lld) / tot_time_running %lld\n",
999  i, 0,papi_pe_buffer[0],
1000  tot_time_enabled,tot_time_running);
1001 
1002  if (tot_time_running == tot_time_enabled) {
1003  /* No scaling needed */
1004  pe_ctl->counts[i] = papi_pe_buffer[0];
1005  } else if (tot_time_running && tot_time_enabled) {
1006  /* Scale factor of 100 to avoid overflows when computing */
1007  /*enabled/running */
1008 
1009  scale = (tot_time_enabled * 100LL) / tot_time_running;
1010  scale = scale * papi_pe_buffer[0];
1011  scale = scale / 100LL;
1012  pe_ctl->counts[i] = scale;
1013  } else {
1014  /* This should not happen, but Phil reports it sometime does. */
1015  SUBDBG("perf_event kernel bug(?) count, enabled, "
1016  "running: %lld, %lld, %lld\n",
1017  papi_pe_buffer[0],tot_time_enabled,
1018  tot_time_running);
1019 
1020  pe_ctl->counts[i] = papi_pe_buffer[0];
1021  }
1022  }
1023  }
1024 
1025  /* Handle cases where we cannot use FORMAT GROUP */
1026  else if (bug_format_group() || pe_ctl->inherit) {
1027 
1028  /* we must read each counter individually */
1029  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1030 
1031  ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer,
1032  sizeof ( papi_pe_buffer ) );
1033  if ( ret == -1 ) {
1034  PAPIERROR("read returned an error: ", strerror( errno ));
1035  return PAPI_ESYS;
1036  }
1037 
1038  /* we should read one 64-bit value from each counter */
1039  if (ret!=sizeof(long long)) {
1040  PAPIERROR("Error! short read");
1041  PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d",
1042  pe_ctl->events[i].event_fd,
1043  (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
1044  return PAPI_ESYS;
1045  }
1046 
1047  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1048  pe_ctl->events[i].event_fd, (long)pe_ctl->tid,
1049  pe_ctl->events[i].cpu, ret);
1050  SUBDBG("read: %lld\n",papi_pe_buffer[0]);
1051 
1052  pe_ctl->counts[i] = papi_pe_buffer[0];
1053  }
1054  }
1055 
1056 
1057  /* Handle cases where we are using FORMAT_GROUP */
1058  /* We assume only one group leader, in position 0 */
1059 
1060  else {
1061  if (pe_ctl->events[0].group_leader_fd!=-1) {
1062  PAPIERROR("Was expecting group leader");
1063  }
1064 
1065  ret = read( pe_ctl->events[0].event_fd, papi_pe_buffer,
1066  sizeof ( papi_pe_buffer ) );
1067 
1068  if ( ret == -1 ) {
1069  PAPIERROR("read returned an error: ", strerror( errno ));
1070  return PAPI_ESYS;
1071  }
1072 
1073  /* we read 1 64-bit value (number of events) then */
1074  /* num_events more 64-bit values that hold the counts */
1075  if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) {
1076  PAPIERROR("Error! short read");
1077  return PAPI_ESYS;
1078  }
1079 
1080  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1081  pe_ctl->events[0].event_fd,
1082  (long)pe_ctl->tid, pe_ctl->events[0].cpu, ret);
1083  {
1084  int j;
1085  for(j=0;j<ret/8;j++) {
1086  SUBDBG("read %d: %lld\n",j,papi_pe_buffer[j]);
1087  }
1088  }
1089 
1090  /* Make sure the kernel agrees with how many events we have */
1091  if (papi_pe_buffer[0]!=pe_ctl->num_events) {
1092  PAPIERROR("Error! Wrong number of events");
1093  return PAPI_ESYS;
1094  }
1095 
1096  /* put the count values in their proper location */
1097  for(i=0;i<pe_ctl->num_events;i++) {
1098  pe_ctl->counts[i] = papi_pe_buffer[1+i];
1099  }
1100  }
1101 
1102 
1103  /* If we disabled the counters due to the sync_read_bug(), */
1104  /* then we need to re-enable them now. */
1105  if (bug_sync_read()) {
1106  if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
1107  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1108  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1109  /* this should refresh any overflow counters too */
1110  ret = ioctl( pe_ctl->events[i].event_fd,
1111  PERF_EVENT_IOC_ENABLE, NULL );
1112  if ( ret == -1 ) {
1113  /* Should never happen */
1114  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) returned an error: ",
1115  strerror( errno ));
1116  return PAPI_ESYS;
1117  }
1118  }
1119  }
1120  }
1121  }
1122 
1123  /* point PAPI to the values we read */
1124  *events = pe_ctl->counts;
1125 
1126  SUBDBG("EXIT: *events: %p\n", *events);
1127  return PAPI_OK;
1128 }
1129 
1130 /* Start counting events */
1131 int
1133 {
1134  int ret;
1135  int i;
1136  int did_something = 0;
1137  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1138  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1139 
1140  /* Reset the counters first. Is this necessary? */
1141  ret = _pe_reset( pe_ctx, pe_ctl );
1142  if ( ret ) {
1143  return ret;
1144  }
1145 
1146  /* Enable all of the group leaders */
1147  /* All group leaders have a group_leader_fd of -1 */
1148  for( i = 0; i < pe_ctl->num_events; i++ ) {
1149  if (pe_ctl->events[i].group_leader_fd == -1) {
1150  SUBDBG("ioctl(enable): fd: %d\n", pe_ctl->events[i].event_fd);
1151  ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_ENABLE, NULL) ;
1152 
1153  /* ioctls always return -1 on failure */
1154  if (ret == -1) {
1155  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1156  return PAPI_ESYS;
1157  }
1158 
1159  did_something++;
1160  }
1161  }
1162 
1163  if (!did_something) {
1164  PAPIERROR("Did not enable any counters");
1165  return PAPI_EBUG;
1166  }
1167 
1168  pe_ctx->state |= PERF_EVENTS_RUNNING;
1169 
1170  return PAPI_OK;
1171 
1172 }
1173 
1174 /* Stop all of the counters */
1175 int
1177 {
1178  SUBDBG( "ENTER: ctx: %p, ctl: %p\n", ctx, ctl);
1179 
1180  int ret;
1181  int i;
1182  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1183  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1184 
1185  /* Just disable the group leaders */
1186  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1187  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1188  ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_DISABLE, NULL);
1189  if ( ret == -1 ) {
1190  PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) "
1191  "returned error, Linux says: %s",
1192  pe_ctl->events[i].event_fd, strerror( errno ) );
1193  return PAPI_EBUG;
1194  }
1195  }
1196  }
1197 
1198  pe_ctx->state &= ~PERF_EVENTS_RUNNING;
1199 
1200  SUBDBG( "EXIT:\n");
1201  return PAPI_OK;
1202 }
1203 
1204 /* This function clears the current contents of the control structure and
1205  updates it with whatever resources are allocated for all the native events
1206  in the native info structure array. */
1207 
1208 int
1211  int count, hwd_context_t *ctx )
1212 {
1213  SUBDBG( "ENTER: ctl: %p, native: %p, count: %d, ctx: %p\n", ctl, native, count, ctx);
1214  int i;
1215  int j;
1216  int ret;
1217  int skipped_events=0;
1218  struct native_event_t *ntv_evt;
1219  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1220  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1221 
1222  /* close all of the existing fds and start over again */
1223  /* In theory we could have finer-grained control and know if */
1224  /* things were changed, but it's easier to tear things down and rebuild. */
1225  close_pe_events( pe_ctx, pe_ctl );
1226 
1227  /* Calling with count==0 should be OK, it's how things are deallocated */
1228  /* when an eventset is destroyed. */
1229  if ( count == 0 ) {
1230  SUBDBG( "EXIT: Called with count == 0\n" );
1231  return PAPI_OK;
1232  }
1233 
1234  /* set up all the events */
1235  for( i = 0; i < count; i++ ) {
1236  if ( native ) {
1237  // get the native event pointer used for this papi event
1238  int ntv_idx = _papi_hwi_get_ntv_idx((unsigned)(native[i].ni_papi_code));
1239  if (ntv_idx < -1) {
1240  SUBDBG("papi_event_code: %#x known by papi but not by the component\n", native[i].ni_papi_code);
1241  continue;
1242  }
1243  // if native index is -1, then we have an event without a mask and need to find the right native index to use
1244  if (ntv_idx == -1) {
1245  // find the native event index we want by matching for the right papi event code
1246  for (j=0 ; j<pe_ctx->event_table->num_native_events ; j++) {
1247  if (pe_ctx->event_table->native_events[j].papi_event_code == native[i].ni_papi_code) {
1248  ntv_idx = j;
1249  }
1250  }
1251  }
1252 
1253  // if native index is still negative, we did not find event we wanted so just return error
1254  if (ntv_idx < 0) {
1255  SUBDBG("papi_event_code: %#x not found in native event tables\n", native[i].ni_papi_code);
1256  continue;
1257  }
1258 
1259  // this native index is positive so there was a mask with the event, the ntv_idx identifies which native event to use
1260  ntv_evt = (struct native_event_t *)(&(pe_ctx->event_table->native_events[ntv_idx]));
1261  SUBDBG("ntv_evt: %p\n", ntv_evt);
1262 
1263  SUBDBG("i: %d, pe_ctx->event_table->num_native_events: %d\n", i, pe_ctx->event_table->num_native_events);
1264 
1265  // Move this events hardware config values and other attributes to the perf_events attribute structure
1266  memcpy (&pe_ctl->events[i].attr, &ntv_evt->attr, sizeof(perf_event_attr_t));
1267 
1268  // may need to update the attribute structure with information from event set level domain settings (values set by PAPI_set_domain)
1269  // only done if the event mask which controls each counting domain was not provided
1270 
1271  // get pointer to allocated name, will be NULL when adding preset events to event set
1272  char *aName = ntv_evt->allocated_name;
1273  if ((aName == NULL) || (strstr(aName, ":u=") == NULL)) {
1274  SUBDBG("set exclude_user attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_user, !(pe_ctl->domain & PAPI_DOM_USER));
1275  pe_ctl->events[i].attr.exclude_user = !(pe_ctl->domain & PAPI_DOM_USER);
1276  }
1277  if ((aName == NULL) || (strstr(aName, ":k=") == NULL)) {
1278  SUBDBG("set exclude_kernel attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_kernel, !(pe_ctl->domain & PAPI_DOM_KERNEL));
1279  pe_ctl->events[i].attr.exclude_kernel = !(pe_ctl->domain & PAPI_DOM_KERNEL);
1280  }
1281  pe_ctl->events[i].attr.exclude_guest = 1;
1282 
1283  // libpfm4 supports mh (monitor host) and mg (monitor guest) event masks
1284  // perf_events supports exclude_hv and exclude_idle attributes
1285  // PAPI_set_domain supports PAPI_DOM_SUPERVISOR and PAPI_DOM_OTHER domain attributes
1286  // not sure how these libpfm4 masks, perf_event attributes, and PAPI domain attributes relate to each other, the code sample below is one possibility
1287 // if (strstr(ntv_evt->allocated_name, ":mg=") == NULL) {
1288 // SUBDBG("set exclude_hv attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_hv, !(pe_ctl->domain & PAPI_DOM_SUPERVISOR));
1289 // pe_ctl->events[i].attr.exclude_hv = !(pe_ctl->domain & PAPI_DOM_SUPERVISOR);
1290 // }
1291 
1292  // set the cpu number provided with an event mask if there was one (will be -1 if mask not provided)
1293  pe_ctl->events[i].cpu = ntv_evt->cpu;
1294  // if cpu event mask not provided, then set the cpu to use to what may have been set on call to PAPI_set_opt (will still be -1 if not called)
1295  if (pe_ctl->events[i].cpu == -1) {
1296  pe_ctl->events[i].cpu = pe_ctl->cpu;
1297  }
1298  } else {
1299  // This case happens when called from _pe_set_overflow and _pe_ctl
1300  // Those callers put things directly into the pe_ctl structure so it is already set for the open call
1301  }
1302 
1303  // Copy the inherit flag into the attribute block that will be passed to the kernel
1304  pe_ctl->events[i].attr.inherit = pe_ctl->inherit;
1305 
1306  /* Set the position in the native structure */
1307  /* We just set up events linearly */
1308  if ( native ) {
1309  native[i].ni_position = i;
1310  SUBDBG( "&native[%d]: %p, ni_papi_code: %#x, ni_event: %#x, ni_position: %d, ni_owners: %d\n",
1311  i, &(native[i]), native[i].ni_papi_code, native[i].ni_event, native[i].ni_position, native[i].ni_owners);
1312  }
1313  }
1314 
1315  if (count <= skipped_events) {
1316  SUBDBG("EXIT: No events to count, they all contained invalid umasks\n");
1317  return PAPI_ENOEVNT;
1318  }
1319 
1320  pe_ctl->num_events = count - skipped_events;
1321 
1322  /* actually open the events */
1323  /* (why is this a separate function?) */
1324  ret = open_pe_events( pe_ctx, pe_ctl );
1325  if ( ret != PAPI_OK ) {
1326  SUBDBG("EXIT: open_pe_events returned: %d\n", ret);
1327  /* Restore values ? */
1328  return ret;
1329  }
1330 
1331  SUBDBG( "EXIT: PAPI_OK\n" );
1332  return PAPI_OK;
1333 }
1334 
1335 /* Set various options on a control state */
1336 int
1337 _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
1338 {
1339  int ret;
1340  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1341  pe_control_t *pe_ctl = NULL;
1342 
1343  switch ( code ) {
1344  case PAPI_MULTIPLEX:
1345  pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state );
1346  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
1347  pe_ctl->granularity,
1348  1, pe_ctl->inherit );
1349  if (ret != PAPI_OK) {
1350  return ret;
1351  }
1352 
1353  /* looks like we are allowed, so set multiplexed attribute */
1354  pe_ctl->multiplexed = 1;
1355  ret = _pe_update_control_state( pe_ctl, NULL,
1356  pe_ctl->num_events, pe_ctx );
1357  if (ret != PAPI_OK) {
1358  pe_ctl->multiplexed = 0;
1359  }
1360  return ret;
1361 
1362  case PAPI_ATTACH:
1363  pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state );
1364  ret = check_permissions( option->attach.tid, pe_ctl->cpu,
1365  pe_ctl->domain, pe_ctl->granularity,
1366  pe_ctl->multiplexed,
1367  pe_ctl->inherit );
1368  if (ret != PAPI_OK) {
1369  return ret;
1370  }
1371 
1372  pe_ctl->tid = option->attach.tid;
1373 
1374  /* If events have been already been added, something may */
1375  /* have been done to the kernel, so update */
1376  ret =_pe_update_control_state( pe_ctl, NULL,
1377  pe_ctl->num_events, pe_ctx);
1378 
1379  return ret;
1380 
1381  case PAPI_DETACH:
1382  pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
1383 
1384  pe_ctl->tid = 0;
1385  return PAPI_OK;
1386 
1387  case PAPI_CPU_ATTACH:
1388  pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state );
1389  ret = check_permissions( pe_ctl->tid, option->cpu.cpu_num,
1390  pe_ctl->domain, pe_ctl->granularity,
1391  pe_ctl->multiplexed,
1392  pe_ctl->inherit );
1393  if (ret != PAPI_OK) {
1394  return ret;
1395  }
1396  /* looks like we are allowed so set cpu number */
1397 
1398  /* this tells the kernel not to count for a thread */
1399  /* should we warn if we try to set both? perf_event */
1400  /* will reject it. */
1401  pe_ctl->tid = -1;
1402 
1403  pe_ctl->cpu = option->cpu.cpu_num;
1404 
1405  return PAPI_OK;
1406 
1407  case PAPI_DOMAIN:
1408  pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state );
1409  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu,
1410  option->domain.domain,
1411  pe_ctl->granularity,
1412  pe_ctl->multiplexed,
1413  pe_ctl->inherit );
1414  if (ret != PAPI_OK) {
1415  return ret;
1416  }
1417  /* looks like we are allowed, so set event set level counting domains */
1418  pe_ctl->domain = option->domain.domain;
1419  return PAPI_OK;
1420 
1421  case PAPI_GRANUL:
1422  pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state );
1423 
1424  /* FIXME: we really don't support this yet */
1425 
1426  switch ( option->granularity.granularity ) {
1427  case PAPI_GRN_PROCG:
1428  case PAPI_GRN_SYS_CPU:
1429  case PAPI_GRN_PROC:
1430  return PAPI_ECMP;
1431 
1432  /* Currently we only support thread and CPU granularity */
1433  case PAPI_GRN_SYS:
1434  pe_ctl->granularity=PAPI_GRN_SYS;
1435  break;
1436 
1437  case PAPI_GRN_THR:
1438  pe_ctl->granularity=PAPI_GRN_THR;
1439  break;
1440 
1441 
1442  default:
1443  return PAPI_EINVAL;
1444  }
1445  return PAPI_OK;
1446 
1447  case PAPI_INHERIT:
1448  pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state );
1449  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
1450  pe_ctl->granularity, pe_ctl->multiplexed,
1451  option->inherit.inherit );
1452  if (ret != PAPI_OK) {
1453  return ret;
1454  }
1455  /* looks like we are allowed, so set the requested inheritance */
1456  if (option->inherit.inherit) {
1457  /* children will inherit counters */
1458  pe_ctl->inherit = 1;
1459  } else {
1460  /* children won't inherit counters */
1461  pe_ctl->inherit = 0;
1462  }
1463  return PAPI_OK;
1464 
1465  case PAPI_DATA_ADDRESS:
1466  return PAPI_ENOSUPP;
1467 #if 0
1468  pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
1469  ret = set_default_domain( pe_ctl, option->address_range.domain );
1470  if ( ret != PAPI_OK ) {
1471  return ret;
1472  }
1473  set_drange( pe_ctx, pe_ctl, option );
1474  return PAPI_OK;
1475 #endif
1476  case PAPI_INSTR_ADDRESS:
1477  return PAPI_ENOSUPP;
1478 #if 0
1479  pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
1480  ret = set_default_domain( pe_ctl, option->address_range.domain );
1481  if ( ret != PAPI_OK ) {
1482  return ret;
1483  }
1484  set_irange( pe_ctx, pe_ctl, option );
1485  return PAPI_OK;
1486 #endif
1487 
1488  case PAPI_DEF_ITIMER:
1489  /* What should we be checking for here? */
1490  /* This seems like it should be OS-specific not component */
1491  /* specific. */
1492 
1493  return PAPI_OK;
1494 
1495  case PAPI_DEF_MPX_NS:
1496  /* Defining a given ns per set is not current supported */
1497  return PAPI_ENOSUPP;
1498 
1499  case PAPI_DEF_ITIMER_NS:
1500  /* We don't support this... */
1501  return PAPI_OK;
1502 
1503  default:
1504  return PAPI_ENOSUPP;
1505  }
1506 }
1507 
1508 /* Initialize a thread */
1509 int
1511 {
1512 
1513  pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx;
1514 
1515  /* clear the context structure and mark as initialized */
1516  memset( pe_ctx, 0, sizeof ( pe_context_t ) );
1517  pe_ctx->initialized=1;
1519  pe_ctx->cidx=our_cidx;
1520 
1521  return PAPI_OK;
1522 }
1523 
1524 /* Initialize a new control state */
1525 int
1527 {
1528  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1529 
1530  /* clear the contents */
1531  memset( pe_ctl, 0, sizeof ( pe_control_t ) );
1532 
1533  /* Set the domain */
1534  _pe_set_domain( ctl, _perf_event_vector.cmp_info.default_domain );
1535 
1536  /* default granularity */
1537  pe_ctl->granularity= _perf_event_vector.cmp_info.default_granularity;
1538 
1539  /* overflow signal */
1540  pe_ctl->overflow_signal=_perf_event_vector.cmp_info.hardware_intr_sig;
1541 
1542  pe_ctl->cidx=our_cidx;
1543 
1544  /* Set cpu number in the control block to show events */
1545  /* are not tied to specific cpu */
1546  pe_ctl->cpu = -1;
1547  return PAPI_OK;
1548 }
1549 
1550 /* Check the mmap page for rdpmc support */
1551 static int _pe_detect_rdpmc(int default_domain) {
1552 
1553  struct perf_event_attr pe;
1554  int fd,rdpmc_exists=1;
1555  void *addr;
1556  struct perf_event_mmap_page *our_mmap;
1557 
1558  /* Create a fake instructions event so we can read a mmap page */
1559  memset(&pe,0,sizeof(struct perf_event_attr));
1560 
1561  pe.type=PERF_TYPE_HARDWARE;
1562  pe.size=sizeof(struct perf_event_attr);
1563  pe.config=PERF_COUNT_HW_INSTRUCTIONS;
1564 
1565  /* There should probably be a helper function to handle this */
1566  /* we break on some ARM because there is no support for excluding */
1567  /* kernel. */
1568  if (default_domain & PAPI_DOM_KERNEL ) {
1569  }
1570  else {
1571  pe.exclude_kernel=1;
1572  }
1573  fd=sys_perf_event_open(&pe,0,-1,-1,0);
1574  if (fd<0) {
1575  return PAPI_ESYS;
1576  }
1577 
1578  /* create the mmap page */
1579  addr=mmap(NULL, 4096, PROT_READ, MAP_SHARED,fd,0);
1580  if (addr == (void *)(-1)) {
1581  close(fd);
1582  return PAPI_ESYS;
1583  }
1584 
1585  /* get the rdpmc info */
1586  our_mmap=(struct perf_event_mmap_page *)addr;
1587  if (our_mmap->cap_usr_rdpmc==0) {
1588  rdpmc_exists=0;
1589  }
1590 
1591  /* close the fake event */
1592  munmap(addr,4096);
1593  close(fd);
1594 
1595  return rdpmc_exists;
1596 
1597 }
1598 
1599 
1600 /* Initialize the perf_event component */
1601 int
1603 {
1604 
1605  int retval;
1606  int paranoid_level;
1607 
1608  FILE *fff;
1609 
1610  our_cidx=cidx;
1611 
1612  /* The is the official way to detect if perf_event support exists */
1613  /* The file is called perf_counter_paranoid on 2.6.31 */
1614  /* currently we are lazy and do not support 2.6.31 kernels */
1615  fff=fopen("/proc/sys/kernel/perf_event_paranoid","r");
1616  if (fff==NULL) {
1617  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1618  "perf_event support not detected",PAPI_MAX_STR_LEN);
1619  return PAPI_ENOCMP;
1620  }
1621 
1622  /* 2 means no kernel measurements allowed */
1623  /* 1 means normal counter access */
1624  /* 0 means you can access CPU-specific data */
1625  /* -1 means no restrictions */
1626  retval=fscanf(fff,"%d",&paranoid_level);
1627  if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
1628  fclose(fff);
1629 
1630  if ((paranoid_level==2) && (getuid()!=0)) {
1631  SUBDBG("/proc/sys/kernel/perf_event_paranoid prohibits kernel counts");
1633  }
1634 
1635  /* Detect NMI watchdog which can steal counters */
1637  if (nmi_watchdog_active) {
1638  SUBDBG("The Linux nmi_watchdog is using one of the performance "
1639  "counters, reducing the total number available.\n");
1640  }
1641  /* Kernel multiplexing is broken prior to kernel 2.6.34 */
1642  /* The fix was probably git commit: */
1643  /* 45e16a6834b6af098702e5ea6c9a40de42ff77d8 */
1644  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) {
1647  }
1648  else {
1651  }
1652 
1653  /* Check that processor is supported */
1656  PAPI_OK) {
1657  fprintf(stderr,"warning, your processor is unsupported\n");
1658  /* should not return error, as software events should still work */
1659  }
1660 
1661  /* Setup mmtimers, if appropriate */
1662  retval=mmtimer_setup();
1663  if (retval) {
1664  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1665  "Error initializing mmtimer",PAPI_MAX_STR_LEN);
1666  return retval;
1667  }
1668 
1669  /* Set the overflow signal */
1670  _papi_hwd[cidx]->cmp_info.hardware_intr_sig = SIGRTMIN + 2;
1671 
1672  /* Run Vendor-specific fixups */
1673  pe_vendor_fixups(_papi_hwd[cidx]);
1674 
1675  /* Detect if we can use rdpmc (or equivalent) */
1676  /* We currently do not use rdpmc as it is slower in tests */
1677  /* than regular read (as of Linux 3.5) */
1678  retval=_pe_detect_rdpmc(_papi_hwd[cidx]->cmp_info.default_domain);
1679  if (retval < 0 ) {
1680  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1681  "sys_perf_event_open() failed, perf_event support for this platform may be broken",PAPI_MAX_STR_LEN);
1682 
1683  return retval;
1684  }
1686 
1687  /* Run the libpfm4-specific setup */
1688  retval = _papi_libpfm4_init(_papi_hwd[cidx]);
1689  if (retval) {
1690  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1691  "Error initializing libpfm4",PAPI_MAX_STR_LEN);
1692  return retval;
1693  }
1694 
1695  retval = _pe_libpfm4_init(_papi_hwd[cidx], cidx,
1698  if (retval) {
1699  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1700  "Error initializing libpfm4",PAPI_MAX_STR_LEN);
1701  return retval;
1702  }
1703 
1704  return PAPI_OK;
1705 
1706 }
1707 
1708 /* Shutdown the perf_event component */
1709 int
1711 
1712  /* deallocate our event table */
1713  _pe_libpfm4_shutdown(&_perf_event_vector, &perf_native_event_table);
1714 
1715  /* Shutdown libpfm4 */
1717 
1718  return PAPI_OK;
1719 }
1720 
1721 
1722 
1723 
1724 int
1725 _pe_ntv_enum_events( unsigned int *PapiEventCode, int modifier )
1726 {
1727  return _pe_libpfm4_ntv_enum_events(PapiEventCode, modifier,
1729 }
1730 
1731 int
1732 _pe_ntv_name_to_code( char *name, unsigned int *event_code) {
1733  return _pe_libpfm4_ntv_name_to_code(name,event_code,
1735 }
1736 
1737 int
1738 _pe_ntv_code_to_name(unsigned int EventCode,
1739  char *ntv_name, int len) {
1740  return _pe_libpfm4_ntv_code_to_name(EventCode,
1741  ntv_name, len,
1743 }
1744 
1745 int
1746 _pe_ntv_code_to_descr( unsigned int EventCode,
1747  char *ntv_descr, int len) {
1748 
1749  return _pe_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len,
1751 }
1752 
1753 int
1754 _pe_ntv_code_to_info(unsigned int EventCode,
1755  PAPI_event_info_t *info) {
1756 
1757  return _pe_libpfm4_ntv_code_to_info(EventCode, info,
1759 }
1760 
1761 /* These functions are based on builtin-record.c in the */
1762 /* kernel's tools/perf directory. */
1763 
1764 static uint64_t
1766 {
1767  struct perf_event_mmap_page *pc = pe->mmap_buf;
1768  int head;
1769 
1770  if ( pc == NULL ) {
1771  PAPIERROR( "perf_event_mmap_page is NULL" );
1772  return 0;
1773  }
1774 
1775  head = pc->data_head;
1776  rmb( );
1777 
1778  return head;
1779 }
1780 
1781 static void
1783 {
1784  struct perf_event_mmap_page *pc = pe->mmap_buf;
1785 
1786  /* ensure all reads are done before we write the tail out. */
1787  pc->data_tail = tail;
1788 }
1789 
1790 
1791 /* Does the kernel define these somewhere? */
1792 struct ip_event {
1793  struct perf_event_header header;
1794  uint64_t ip;
1795 };
1796 struct lost_event {
1797  struct perf_event_header header;
1798  uint64_t id;
1799  uint64_t lost;
1800 };
1801 typedef union event_union {
1802  struct perf_event_header header;
1803  struct ip_event ip;
1806 
1807 /* Should re-write with comments if we ever figure out what's */
1808 /* going on here. */
1809 static void
1811  int profile_index )
1812 {
1813  uint64_t head = mmap_read_head( pe );
1814  uint64_t old = pe->tail;
1815  unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize( );
1816  int diff;
1817 
1818  diff = head - old;
1819  if ( diff < 0 ) {
1820  SUBDBG( "WARNING: failed to keep up with mmap data. head = %" PRIu64
1821  ", tail = %" PRIu64 ". Discarding samples.\n", head, old );
1822  /* head points to a known good entry, start there. */
1823  old = head;
1824  }
1825 
1826  for( ; old != head; ) {
1828  & data[old & pe->mask];
1829  perf_sample_event_t event_copy;
1830  size_t size = event->header.size;
1831 
1832  /* Event straddles the mmap boundary -- header should always */
1833  /* be inside due to u64 alignment of output. */
1834  if ( ( old & pe->mask ) + size != ( ( old + size ) & pe->mask ) ) {
1835  uint64_t offset = old;
1836  uint64_t len = min( sizeof ( *event ), size ), cpy;
1837  void *dst = &event_copy;
1838 
1839  do {
1840  cpy = min( pe->mask + 1 - ( offset & pe->mask ), len );
1841  memcpy( dst, &data[offset & pe->mask], cpy );
1842  offset += cpy;
1843  dst = ((unsigned char*)dst) + cpy;
1844  len -= cpy;
1845  } while ( len );
1846 
1847  event = &event_copy;
1848  }
1849  old += size;
1850 
1851  SUBDBG( "event->type = %08x\n", event->header.type );
1852  SUBDBG( "event->size = %d\n", event->header.size );
1853 
1854  switch ( event->header.type ) {
1855  case PERF_RECORD_SAMPLE:
1856  _papi_hwi_dispatch_profile( ( *thr )->running_eventset[cidx],
1857  ( caddr_t ) ( unsigned long ) event->ip.ip,
1858  0, profile_index );
1859  break;
1860 
1861  case PERF_RECORD_LOST:
1862  SUBDBG( "Warning: because of a mmap buffer overrun, %" PRId64
1863  " events were lost.\n"
1864  "Loss was recorded when counter id %#"PRIx64
1865  " overflowed.\n", event->lost.lost, event->lost.id );
1866  break;
1867 
1868  default:
1869  SUBDBG( "Error: unexpected header type - %d\n",
1870  event->header.type );
1871  break;
1872  }
1873  }
1874 
1875  pe->tail = old;
1876  mmap_write_tail( pe, old );
1877 }
1878 
1879 /* Find a native event specified by a profile index */
1880 static int
1881 find_profile_index( EventSetInfo_t *ESI, int evt_idx, int *flags,
1882  unsigned int *native_index, int *profile_index )
1883 {
1884  int pos, esi_index, count;
1885 
1886  for ( count = 0; count < ESI->profile.event_counter; count++ ) {
1887  esi_index = ESI->profile.EventIndex[count];
1888  pos = ESI->EventInfoArray[esi_index].pos[0];
1889 
1890  if ( pos == evt_idx ) {
1891  *profile_index = count;
1892  *native_index = ESI->NativeInfoArray[pos].ni_event &
1894  *flags = ESI->profile.flags;
1895  SUBDBG( "Native event %d is at profile index %d, flags %d\n",
1896  *native_index, *profile_index, *flags );
1897  return PAPI_OK;
1898  }
1899  }
1900  PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d", count,
1901  ESI->profile.event_counter );
1902  return PAPI_EBUG;
1903 }
1904 
1905 
1906 
1907 /* What exactly does this do? */
1908 static int
1909 process_smpl_buf( int evt_idx, ThreadInfo_t **thr, int cidx )
1910 {
1911  int ret, flags, profile_index;
1912  unsigned native_index;
1913  pe_control_t *ctl;
1914 
1915  ret = find_profile_index( ( *thr )->running_eventset[cidx], evt_idx,
1916  &flags, &native_index, &profile_index );
1917  if ( ret != PAPI_OK ) {
1918  return ret;
1919  }
1920 
1921  ctl= (*thr)->running_eventset[cidx]->ctl_state;
1922 
1923  mmap_read( cidx, thr,
1924  &(ctl->events[evt_idx]),
1925  profile_index );
1926 
1927  return PAPI_OK;
1928 }
1929 
1930 /*
1931  * This function is used when hardware overflows are working or when
1932  * software overflows are forced
1933  */
1934 
1935 void
1936 _pe_dispatch_timer( int n, hwd_siginfo_t *info, void *uc)
1937 {
1938  ( void ) n; /*unused */
1939  _papi_hwi_context_t hw_context;
1940  int found_evt_idx = -1, fd = info->si_fd;
1941  caddr_t address;
1943  int i;
1944  pe_control_t *ctl;
1945  int cidx = _perf_event_vector.cmp_info.CmpIdx;
1946 
1947  if ( thread == NULL ) {
1948  PAPIERROR( "thread == NULL in _papi_pe_dispatch_timer for fd %d!", fd );
1949  return;
1950  }
1951 
1952  if ( thread->running_eventset[cidx] == NULL ) {
1953  PAPIERROR( "thread->running_eventset == NULL in "
1954  "_papi_pe_dispatch_timer for fd %d!",fd );
1955  return;
1956  }
1957 
1958  if ( thread->running_eventset[cidx]->overflow.flags == 0 ) {
1959  PAPIERROR( "thread->running_eventset->overflow.flags == 0 in "
1960  "_papi_pe_dispatch_timer for fd %d!", fd );
1961  return;
1962  }
1963 
1964  hw_context.si = info;
1965  hw_context.ucontext = ( hwd_ucontext_t * ) uc;
1966 
1967  if ( thread->running_eventset[cidx]->overflow.flags &
1969  address = GET_OVERFLOW_ADDRESS( hw_context );
1970  _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
1971  address, NULL, 0,
1972  0, &thread, cidx );
1973  return;
1974  }
1975 
1976  if ( thread->running_eventset[cidx]->overflow.flags !=
1978  PAPIERROR( "thread->running_eventset->overflow.flags is set to "
1979  "something other than PAPI_OVERFLOW_HARDWARE or "
1980  "PAPI_OVERFLOW_FORCE_SW for fd %d (%#x)",
1981  fd , thread->running_eventset[cidx]->overflow.flags);
1982  }
1983 
1984  /* convoluted way to get ctl */
1985  ctl= thread->running_eventset[cidx]->ctl_state;
1986 
1987  /* See if the fd is one that's part of the this thread's context */
1988  for( i=0; i < ctl->num_events; i++ ) {
1989  if ( fd == ctl->events[i].event_fd ) {
1990  found_evt_idx = i;
1991  break;
1992  }
1993  }
1994 
1995  if ( found_evt_idx == -1 ) {
1996  PAPIERROR( "Unable to find fd %d among the open event fds "
1997  "_papi_hwi_dispatch_timer!", fd );
1998  return;
1999  }
2000 
2001  if (ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ) == -1 ) {
2002  PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed");
2003  }
2004 
2005  if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) &&
2006  !( thread->running_eventset[cidx]->profile.flags &
2007  PAPI_PROFIL_FORCE_SW ) ) {
2008  process_smpl_buf( found_evt_idx, &thread, cidx );
2009  }
2010  else {
2011  uint64_t ip;
2012  unsigned int head;
2013  pe_event_info_t *pe = &(ctl->events[found_evt_idx]);
2014  unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize( );
2015 
2016  /*
2017  * Read up the most recent IP from the sample in the mmap buffer. To
2018  * do this, we make the assumption that all of the records in the
2019  * mmap buffer are the same size, and that they all contain the IP as
2020  * their only record element. This means that we can use the
2021  * data_head element from the user page and move backward one record
2022  * from that point and read the data. Since we don't actually need
2023  * to access the header of the record, we can just subtract 8 (size
2024  * of the IP) from data_head and read up that word from the mmap
2025  * buffer. After we subtract 8, we account for mmap buffer wrapping
2026  * by AND'ing this offset with the buffer mask.
2027  */
2028  head = mmap_read_head( pe );
2029 
2030  if ( head == 0 ) {
2031  PAPIERROR( "Attempting to access memory which may be inaccessable" );
2032  return;
2033  }
2034  ip = *( uint64_t * ) ( data + ( ( head - 8 ) & pe->mask ) );
2035  /*
2036  * Update the tail to the current head pointer.
2037  *
2038  * Note: that if we were to read the record at the tail pointer,
2039  * rather than the one at the head (as you might otherwise think
2040  * would be natural), we could run into problems. Signals don't
2041  * stack well on Linux, particularly if not using RT signals, and if
2042  * they come in rapidly enough, we can lose some. Overtime, the head
2043  * could catch up to the tail and monitoring would be stopped, and
2044  * since no more signals are coming in, this problem will never be
2045  * resolved, resulting in a complete loss of overflow notification
2046  * from that point on. So the solution we use here will result in
2047  * only the most recent IP value being read every time there are two
2048  * or more samples in the buffer (for that one overflow signal). But
2049  * the handler will always bring up the tail, so the head should
2050  * never run into the tail.
2051  */
2052  mmap_write_tail( pe, head );
2053 
2054  /*
2055  * The fourth parameter is supposed to be a vector of bits indicating
2056  * the overflowed hardware counters, but it's not really clear that
2057  * it's useful, because the actual hardware counters used are not
2058  * exposed to the PAPI user. For now, I'm just going to set the bit
2059  * that indicates which event register in the array overflowed. The
2060  * result is that the overflow vector will not be identical to the
2061  * perfmon implementation, and part of that is due to the fact that
2062  * which hardware register is actually being used is opaque at the
2063  * user level (the kernel event dispatcher hides that info).
2064  */
2065 
2066  _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
2067  ( caddr_t ) ( unsigned long ) ip,
2068  NULL, ( 1 << found_evt_idx ), 0,
2069  &thread, cidx );
2070 
2071  }
2072 
2073  /* Restart the counters */
2074  if (ioctl( fd, PERF_EVENT_IOC_REFRESH, PAPI_REFRESH_VALUE ) == -1) {
2075  PAPIERROR( "overflow refresh failed", 0 );
2076  }
2077 }
2078 
2079 /* Stop profiling */
2080 int
2082 {
2083  int i, ret = PAPI_OK;
2084  pe_control_t *ctl;
2085  int cidx;
2086 
2087  ctl=ESI->ctl_state;
2088 
2089  cidx=ctl->cidx;
2090 
2091  /* Loop through all of the events and process those which have mmap */
2092  /* buffers attached. */
2093  for ( i = 0; i < ctl->num_events; i++ ) {
2094  /* Use the mmap_buf field as an indicator of this fd being used for */
2095  /* profiling. */
2096  if ( ctl->events[i].mmap_buf ) {
2097  /* Process any remaining samples in the sample buffer */
2098  ret = process_smpl_buf( i, &thread, cidx );
2099  if ( ret ) {
2100  PAPIERROR( "process_smpl_buf returned error %d", ret );
2101  return ret;
2102  }
2103  }
2104  }
2105  return ret;
2106 }
2107 
2108 /* Setup an event to cause overflow */
2109 int
2110 _pe_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
2111 {
2112  SUBDBG("ENTER: ESI: %p, EventIndex: %d, threshold: %d\n", ESI, EventIndex, threshold);
2113 
2114  pe_context_t *ctx;
2115  pe_control_t *ctl = (pe_control_t *) ( ESI->ctl_state );
2116  int i, evt_idx, found_non_zero_sample_period = 0, retval = PAPI_OK;
2117  int cidx;
2118 
2119  cidx = ctl->cidx;
2120  ctx = ( pe_context_t *) ( ESI->master->context[cidx] );
2121 
2122  evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
2123 
2124  SUBDBG("Attempting to set overflow for index %d (%d) of EventSet %d\n",
2125  evt_idx,EventIndex,ESI->EventSetIndex);
2126 
2127  if (evt_idx<0) {
2128  SUBDBG("EXIT: evt_idx: %d\n", evt_idx);
2129  return PAPI_EINVAL;
2130  }
2131 
2132  if ( threshold == 0 ) {
2133  /* If this counter isn't set to overflow, it's an error */
2134  if ( ctl->events[evt_idx].attr.sample_period == 0 ) {
2135  SUBDBG("EXIT: PAPI_EINVAL, Tried to clear sample threshold when it was not set\n");
2136  return PAPI_EINVAL;
2137  }
2138  }
2139 
2140  ctl->events[evt_idx].attr.sample_period = threshold;
2141 
2142  /*
2143  * Note that the wakeup_mode field initially will be set to zero
2144  * (WAKEUP_MODE_COUNTER_OVERFLOW) as a result of a call to memset 0 to
2145  * all of the events in the ctl struct.
2146  *
2147  * Is it even set to any other value elsewhere?
2148  */
2149  switch ( ctl->events[evt_idx].wakeup_mode ) {
2150  case WAKEUP_MODE_PROFILING:
2151  /* Setting wakeup_events to special value zero means issue a */
2152  /* wakeup (signal) on every mmap page overflow. */
2153  ctl->events[evt_idx].attr.wakeup_events = 0;
2154  break;
2155 
2157  /* Can this code ever be called? */
2158 
2159  /* Setting wakeup_events to one means issue a wakeup on every */
2160  /* counter overflow (not mmap page overflow). */
2161  ctl->events[evt_idx].attr.wakeup_events = 1;
2162  /* We need the IP to pass to the overflow handler */
2163  ctl->events[evt_idx].attr.sample_type = PERF_SAMPLE_IP;
2164  /* one for the user page, and two to take IP samples */
2165  ctl->events[evt_idx].nr_mmap_pages = 1 + 2;
2166  break;
2167  default:
2168  PAPIERROR( "ctl->wakeup_mode[%d] set to an unknown value - %u",
2169  evt_idx, ctl->events[evt_idx].wakeup_mode);
2170  SUBDBG("EXIT: PAPI_EBUG\n");
2171  return PAPI_EBUG;
2172  }
2173 
2174  /* Check for non-zero sample period */
2175  for ( i = 0; i < ctl->num_events; i++ ) {
2176  if ( ctl->events[evt_idx].attr.sample_period ) {
2177  found_non_zero_sample_period = 1;
2178  break;
2179  }
2180  }
2181 
2182  if ( found_non_zero_sample_period ) {
2183  /* turn on internal overflow flag for this event set */
2184  ctl->overflow = 1;
2185 
2186  /* Enable the signal handler */
2188  ctl->overflow_signal,
2189  1, ctl->cidx );
2190  if (retval != PAPI_OK) {
2191  SUBDBG("Call to _papi_hwi_start_signal returned: %d\n", retval);
2192  }
2193  } else {
2194  /* turn off internal overflow flag for this event set */
2195  ctl->overflow = 0;
2196 
2197  /* Remove the signal handler, if there are no remaining non-zero */
2198  /* sample_periods set */
2200  if ( retval != PAPI_OK ) {
2201  SUBDBG("Call to _papi_hwi_stop_signal returned: %d\n", retval);
2202  return retval;
2203  }
2204  }
2205 
2206  retval = _pe_update_control_state( ctl, NULL,
2207  ( (pe_control_t *) (ESI->ctl_state) )->num_events,
2208  ctx );
2209 
2210  SUBDBG("EXIT: return: %d\n", retval);
2211  return retval;
2212 }
2213 
2214 /* Enable profiling */
2215 int
2216 _pe_set_profile( EventSetInfo_t *ESI, int EventIndex, int threshold )
2217 {
2218  int ret;
2219  int evt_idx;
2220  pe_control_t *ctl = ( pe_control_t *) ( ESI->ctl_state );
2221 
2222  /* Since you can't profile on a derived event, the event is always the */
2223  /* first and only event in the native event list. */
2224  evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
2225 
2226  if ( threshold == 0 ) {
2227  SUBDBG( "MUNMAP(%p,%"PRIu64")\n", ctl->events[evt_idx].mmap_buf,
2228  ( uint64_t ) ctl->events[evt_idx].nr_mmap_pages *
2229  getpagesize( ) );
2230 
2231  if ( ctl->events[evt_idx].mmap_buf ) {
2232  munmap( ctl->events[evt_idx].mmap_buf,
2233  ctl->events[evt_idx].nr_mmap_pages * getpagesize() );
2234  }
2235  ctl->events[evt_idx].mmap_buf = NULL;
2236  ctl->events[evt_idx].nr_mmap_pages = 0;
2237  ctl->events[evt_idx].attr.sample_type &= ~PERF_SAMPLE_IP;
2238  ret = _pe_set_overflow( ESI, EventIndex, threshold );
2239  /* ??? #warning "This should be handled somewhere else" */
2240  ESI->state &= ~( PAPI_OVERFLOWING );
2241  ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE );
2242 
2243  return ret;
2244  }
2245 
2246  /* Look up the native event code */
2248  /* Not supported yet... */
2249 
2250  return PAPI_ENOSUPP;
2251  }
2252  if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) {
2253  /* This requires an ability to randomly alter the sample_period within */
2254  /* a given range. Kernel does not have this ability. FIXME */
2255  return PAPI_ENOSUPP;
2256  }
2257 
2258  /* Just a guess at how many pages would make this relatively efficient. */
2259  /* Note that it's "1 +" because of the need for a control page, and the */
2260  /* number following the "+" must be a power of 2 (1, 4, 8, 16, etc) or */
2261  /* zero. This is required to optimize dealing with circular buffer */
2262  /* wrapping of the mapped pages. */
2263 
2264  ctl->events[evt_idx].nr_mmap_pages = (1+8);
2265  ctl->events[evt_idx].attr.sample_type |= PERF_SAMPLE_IP;
2266 
2267  ret = _pe_set_overflow( ESI, EventIndex, threshold );
2268  if ( ret != PAPI_OK ) return ret;
2269 
2270  return PAPI_OK;
2271 }
2272 
2273 
2274 /* Our component vector */
2275 
2276 papi_vector_t _perf_event_vector = {
2277  .cmp_info = {
2278  /* component information (unspecified values initialized to 0) */
2279  .name = "perf_event",
2280  .short_name = "perf",
2281  .version = "5.0",
2282  .description = "Linux perf_event CPU counters",
2283 
2284  .default_domain = PAPI_DOM_USER,
2285  .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR,
2286  .default_granularity = PAPI_GRN_THR,
2287  .available_granularities = PAPI_GRN_THR | PAPI_GRN_SYS,
2288 
2289  .hardware_intr = 1,
2290  .kernel_profile = 1,
2291 
2292  /* component specific cmp_info initializations */
2293  .fast_virtual_timer = 0,
2294  .attach = 1,
2295  .attach_must_ptrace = 1,
2296  .cpu = 1,
2297  .inherit = 1,
2298  .cntr_umasks = 1,
2299 
2300  },
2301 
2302  /* sizes of framework-opaque component-private structures */
2303  .size = {
2304  .context = sizeof ( pe_context_t ),
2305  .control_state = sizeof ( pe_control_t ),
2306  .reg_value = sizeof ( int ),
2307  .reg_alloc = sizeof ( int ),
2308  },
2309 
2310  /* function pointers in this component */
2311  .init_component = _pe_init_component,
2312  .shutdown_component = _pe_shutdown_component,
2313  .init_thread = _pe_init_thread,
2314  .init_control_state = _pe_init_control_state,
2315  .dispatch_timer = _pe_dispatch_timer,
2316 
2317  /* function pointers from the shared perf_event lib */
2318  .start = _pe_start,
2319  .stop = _pe_stop,
2320  .read = _pe_read,
2321  .shutdown_thread = _pe_shutdown_thread,
2322  .ctl = _pe_ctl,
2323  .update_control_state = _pe_update_control_state,
2324  .set_domain = _pe_set_domain,
2325  .reset = _pe_reset,
2326  .set_overflow = _pe_set_overflow,
2327  .set_profile = _pe_set_profile,
2328  .stop_profiling = _pe_stop_profiling,
2329  .write = _pe_write,
2330 
2331 
2332  /* from counter name mapper */
2333  .ntv_enum_events = _pe_ntv_enum_events,
2334  .ntv_name_to_code = _pe_ntv_name_to_code,
2335  .ntv_code_to_name = _pe_ntv_code_to_name,
2336  .ntv_code_to_descr = _pe_ntv_code_to_descr,
2337  .ntv_code_to_info = _pe_ntv_code_to_info,
2338 };
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:626
i inherit inherit
#define PAPI_ENOEVNT
Definition: papi.h:258
void _pe_dispatch_timer(int n, hwd_siginfo_t *info, void *uc)
Definition: perf_event.c:1936
ssize_t read(int fd, void *buf, size_t count)
Definition: appio.c:225
memset(eventId, 0, size)
long long counts[PERF_EVENT_MAX_MPX_COUNTERS]
int _pe_shutdown_thread(hwd_context_t *ctx)
Definition: perf_event.c:861
int _papi_hwi_get_ntv_idx(unsigned int papi_evt_code)
_papi_int_inherit_t inherit
static int process_smpl_buf(int evt_idx, ThreadInfo_t **thr, int cidx)
Definition: perf_event.c:1909
int errno
int close(int fd)
Definition: appio.c:175
#define PAPI_OVERFLOWING
Definition: papi.h:376
int _pe_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1176
#define PAPI_CPU_ATTACH
Definition: papi.h:455
int _pe_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len)
Definition: perf_event.c:1746
int _pe_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:875
#define PERF_EVENT_MAX_MPX_COUNTERS
Definition: perf_event_lib.h:5
EventSetInfo_t * ESI
static int close_pe_events(pe_context_t *ctx, pe_control_t *ctl)
Definition: perf_event.c:753
int _pe_libpfm4_ntv_enum_events(unsigned int *PapiEventCode, int modifier, struct native_event_table_t *event_table)
struct native_event_t * native_events
unsigned int granularity
long long flags
Definition: iozone.c:12330
#define PAPI_DEF_ITIMER_NS
Definition: papi.h:453
int _pe_libpfm4_ntv_name_to_code(char *name, unsigned int *event_code, struct native_event_table_t *event_table)
int _pe_ntv_enum_events(unsigned int *PapiEventCode, int modifier)
Definition: perf_event.c:1725
EventSetInfo_t * ESI
int _papi_libpfm4_init(papi_vector_t *my_vector)
struct in_addr * ip
Definition: iozone.c:20416
#define PAPI_INSTR_ADDRESS
Definition: papi.h:451
gc head
Definition: libasync.c:669
#define PAPI_PROFIL_DATA_EAR
Definition: papi.h:402
static int _pe_set_domain(hwd_control_state_t *ctl, int domain)
Definition: perf_event.c:850
#define PAPI_DEF_MPX_NS
Definition: papi.h:434
cpu
Definition: iozone.c:3872
_papi_int_addr_range_t address_range
static int bug_check_scheduability(void)
Definition: perf_event.c:173
#define READ_BUFFER_SIZE
Definition: perf_event.c:472
static long sys_perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
Definition: perf_event.c:313
int _pe_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
Definition: perf_event.c:1754
int default_granularity
Definition: papi.h:642
#define PAPI_ENOSUPP
Definition: papi.h:269
unsigned int wakeup_mode
off64_t offset
Definition: iozone.c:1279
#define PAPI_DATA_ADDRESS
Definition: papi.h:450
#define PAPI_DOM_KERNEL
Definition: papi.h:298
int fd
Definition: iozone.c:1291
#define PAPI_REFRESH_VALUE
Definition: perf_event.c:92
static int bug_format_group(void)
Definition: perf_event.c:195
EventSetInfo_t * ESI
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
static int set_irange(hwd_context_t *ctx, hwd_control_state_t *current_state, _papi_int_option_t *option)
Definition: perfmon-ia64.c:919
struct perf_event_header header
Definition: perf_event.c:1793
pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]
#define PERF_EVENTS_RUNNING
Definition: perf_event.c:59
#define PAPI_EBUG
Definition: papi.h:257
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
#define PMU_TYPE_OS
int _pe_libpfm4_get_cidx(void)
Definition: perf_event.c:71
static int find_profile_index(EventSetInfo_t *ESI, int evt_idx, int *flags, unsigned int *native_index, int *profile_index)
Definition: perf_event.c:1881
static pid_t mygettid(void)
Definition: darwin-common.h:11
int _pe_init_control_state(hwd_control_state_t *ctl)
Definition: perf_event.c:1526
fclose(thread_wqfd)
#define PAPI_DOM_USER
Definition: papi.h:296
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
int _pe_libpfm4_init(papi_vector_t *my_vector, int cidx, struct native_event_table_t *event_table, int pmu_type)
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define PAPI_MAX_SW_MPX_EVENTS
Definition: sw_multiplex.h:4
static int check_permissions(unsigned long tid, unsigned int cpu_num, unsigned int domain, unsigned int granularity, unsigned int multiplex, unsigned int inherit)
Definition: perf_event.c:413
static int _pe_detect_rdpmc(int default_domain)
Definition: perf_event.c:1551
static int set_default_domain(EventSetInfo_t *zero, int domain)
Definition: aix.c:510
#define PAPI_EPERM
Definition: papi.h:266
struct perf_event_header header
Definition: perf_event.c:1797
static int processor_supported(int vendor, int family)
Definition: perf_event.c:103
papi_vector_t * _papi_hwd[]
#define PAPI_INHERIT
Definition: papi.h:456
Return codes and api definitions.
uint32_t nr_mmap_pages
FILE * fff[MAX_EVENTS]
unsigned int domain
char events[MAX_EVENTS][BUFSIZ]
int multiplex(void)
Definition: multiplex.c:35
_papi_int_attach_t attach
int _pe_shutdown_component(void)
Definition: perf_event.c:1710
long long ret
Definition: iozone.c:1346
unsigned int overflow
unsigned long tid
int _pe_libpfm4_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len, struct native_event_table_t *event_table)
papi_vector_t _perf_event_vector
Definition: perf_event.c:65
int _pe_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: perf_event.c:1209
_papi_int_cpu_t cpu
int i
Definition: fileop.c:140
EventSetOverflowInfo_t overflow
int _papi_libpfm4_shutdown(void)
#define PAPI_OVERFLOW_HARDWARE
Definition: papi.h:410
unsigned int fast_real_timer
Definition: papi.h:657
PAPI_os_info_t _papi_os_info
Definition: aix.c:1210
struct _ThreadInfo * master
#define PAPI_VENDOR_IBM
Definition: papi.h:348
int _pe_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: perf_event.c:930
#define WAKEUP_MODE_PROFILING
Definition: perf_event.c:82
static int pe_vendor_fixups(papi_vector_t *vector)
Definition: perf_event.c:117
static int pid
char *long long size
Definition: iozone.c:12023
int _pe_set_overflow(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perf_event.c:2110
int _pe_stop_profiling(ThreadInfo_t *thread, EventSetInfo_t *ESI)
Definition: perf_event.c:2081
static int cidx
Definition: event_info.c:40
static int check_scheduability(pe_context_t *ctx, pe_control_t *ctl, int idx)
Definition: perf_event.c:482
unsigned int fast_counter_read
Definition: papi.h:656
hwd_ucontext_t * ucontext
#define PAPI_ESYS
Definition: papi.h:253
static int native
Definition: event_info.c:39
#define PAPI_PROFIL_RANDOM
Definition: papi.h:395
#define PAPI_GRANUL
Definition: papi.h:433
void * thread(void *arg)
Definition: kufrin.c:31
#define PERF_EVENTS_OPENED
Definition: perf_event.c:58
void *long long tid
Definition: iozone.c:18586
int _pe_ntv_name_to_code(char *name, unsigned int *event_code)
Definition: perf_event.c:1732
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_PROFIL_INST_EAR
Definition: papi.h:403
#define PAPI_VENDOR_MIPS
Definition: papi.h:353
_papi_int_granularity_t granularity
static void mmap_read(int cidx, ThreadInfo_t **thr, pe_event_info_t *pe, int profile_index)
Definition: perf_event.c:1810
#define PAPI_ECNFLCT
Definition: papi.h:259
EventSetInfo_t * ESI
#define PAPI_DETACH
Definition: papi.h:427
void PAPIERROR(char *format,...)
unsigned int multiplexed
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:401
static uint64_t mmap_read_head(pe_event_info_t *pe)
Definition: perf_event.c:1765
int mmtimer_setup(void)
Definition: linux-timer.c:116
#define PAPI_ATTACH
Definition: papi.h:445
unsigned int kernel_multiplex
Definition: papi.h:653
#define PAPI_ECMP
Definition: papi.h:254
struct native_event_table_t * event_table
#define PAPI_VENDOR_ARM
Definition: papi.h:352
#define min(x, y)
Definition: darwin-common.h:4
#define PMU_TYPE_CORE
#define PAPI_MULTIPLEX
Definition: papi.h:429
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:441
#define PAPI_GRN_THR
Definition: papi.h:360
#define WAKEUP_MODE_COUNTER_OVERFLOW
Definition: perf_event.c:81
EventSetInfo_t * ESI
#define PAPI_GRN_SYS_CPU
Definition: papi.h:365
_papi_int_multiplex_t multiplex
char * addr
Definition: iozone.c:12026
NativeInfo_t * NativeInfoArray
uint64_t id
Definition: perf_event.c:1798
EventInfo_t * EventInfoArray
int cpuid_family
Definition: papi.h:789
#define PAPI_ENOMEM
Definition: papi.h:252
int threshold
int _pe_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1132
#define PAPI_VENDOR_CRAY
Definition: papi.h:349
static int bug_sync_read(void)
Definition: perf_event.c:215
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:57
PAPI_hw_info_t hw_info
again struct sockaddr sizeof(struct sockaddr_in))
unsigned int overflow_signal
int _pe_libpfm4_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info, struct native_event_table_t *event_table)
#define PAPI_ENOCMP
Definition: papi.h:268
#define PAPI_DOMAIN
Definition: papi.h:431
#define PAPI_VENDOR_INTEL
Definition: papi.h:346
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
static void mmap_write_tail(pe_event_info_t *pe, uint64_t tail)
Definition: perf_event.c:1782
int _pe_libpfm4_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len, struct native_event_table_t *event_table)
#define LINUX_VERSION(a, b, c)
Definition: linux-common.h:4
uint64_t ip
Definition: perf_event.c:1794
int _pe_init_component(int cidx)
Definition: perf_event.c:1602
int _pe_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len)
Definition: perf_event.c:1738
int vendor
Definition: papi.h:784
struct sigcontext hwd_ucontext_t
Definition: aix-context.h:10
unsigned int cpu_num
static int map_perf_event_errors_to_papi(int perf_event_error)
Definition: perf_event.c:364
int _pe_libpfm4_shutdown(papi_vector_t *my_vector, struct native_event_table_t *event_table)
#define PAPI_OVERFLOW_FORCE_SW
Definition: papi.h:409
static int fcntl_setown_fd(int fd)
Definition: perf_event.c:229
static int set_drange(hwd_context_t *ctx, hwd_control_state_t *current_state, _papi_int_option_t *option)
Definition: perfmon-ia64.c:767
#define PAPI_DEF_ITIMER
Definition: papi.h:452
#define PAPI_PROFILING
Definition: papi.h:377
EventSetInfo_t ** running_eventset
Definition: threads.h:30
char * name
Definition: iozone.c:23648
perf_event_attr_t attr
void _papi_hwi_dispatch_profile(EventSetInfo_t *ESI, caddr_t pc, long long over, int profile_index)
Definition: extras.c:163
struct perf_event_attr attr
struct native_event_table_t perf_native_event_table
Definition: perf_event.c:68
int
Definition: iozone.c:18528
int _pe_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: perf_event.c:1337
#define MAP_FAILED
Definition: iozone.c:336
static int our_cidx
Definition: perf_event.c:69
inline_static ThreadInfo_t * _papi_hwi_lookup_thread(int custom_tid)
Definition: threads.h:92
#define PAPI_NATIVE_AND_MASK
uint64_t lost
Definition: perf_event.c:1799
#define PAPI_PROFIL_FORCE_SW
Definition: papi.h:401
int _pe_init_thread(hwd_context_t *hwd_ctx)
Definition: perf_event.c:1510
unsigned int inherit
int _linux_detect_nmi_watchdog()
Definition: linux-common.c:598
#define F_OWNER_TID
Definition: linux-common.h:28
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
_papi_int_domain_t domain
gc tail
Definition: libasync.c:667
#define PAPI_GRN_PROCG
Definition: papi.h:363
char model_string[PAPI_MAX_STR_LEN]
Definition: papi.h:787
int nmi_watchdog_active
Definition: perf_event.c:62
hwd_siginfo_t * si
#define PAPI_DOM_OTHER
Definition: papi.h:299
static unsigned int get_read_format(unsigned int multiplex, unsigned int inherit, int format_group)
Definition: perf_event.c:263
#define F_SETOWN_EX
Definition: linux-common.h:25
int _papi_hwi_dispatch_overflow_signal(void *papiContext, caddr_t address, int *isHardware, long long overflow_bit, int genOverflowBit, ThreadInfo_t **t, int cidx)
Definition: extras.c:214
#define PAPI_DOM_SUPERVISOR
Definition: papi.h:300
EventSetInfo_t * ESI
EventSetProfileInfo_t profile
#define PAPI_GRN_SYS
Definition: papi.h:364
hwd_control_state_t * ctl_state
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
int _pe_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *from)
Definition: perf_event.c:900
#define PAPI_ECOUNT
Definition: papi.h:274
#define GET_OVERFLOW_ADDRESS(ctx)
Definition: aix-context.h:12
int _pe_set_profile(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perf_event.c:2216
static int open_pe_events(pe_context_t *ctx, pe_control_t *ctl)
Definition: perf_event.c:628
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")
EventSetInfo_t * ESI
#define PAPI_VENDOR_AMD
Definition: papi.h:347
int n
Definition: mendes-alt.c:164
static int tune_up_fd(pe_control_t *ctl, int evt_idx)
Definition: perf_event.c:560
#define PAPI_GRN_PROC
Definition: papi.h:362