PAPI  5.6.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perf_event.c
Go to the documentation of this file.
1 /*
2 * File: perf_event.c
3 *
4 * Author: Corey Ashford
5 * cjashfor@us.ibm.com
6 * - based upon perfmon.c written by -
7 * Philip Mucci
8 * mucci@cs.utk.edu
9 * Mods: Gary Mohr
10 * gary.mohr@bull.com
11 * Mods: Vince Weaver
12 * vweaver1@eecs.utk.edu
13 * Mods: Philip Mucci
14 * mucci@eecs.utk.edu
15 * Mods: Gary Mohr
16 * gary.mohr@bull.com
17 * Modified the perf_event component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4.
18 * This adds several new event masks, including cpu=, u=, and k= which give the user
19 * the ability to set cpu number to use or control the domain (user, kernel, or both)
20 * in which the counter should be incremented. These are event masks so it is now
21 * possible to have multiple events in the same event set that count activity from
22 * differennt cpu's or count activity in different domains.
23 */
24 
25 
26 #include <fcntl.h>
27 #include <string.h>
28 #include <errno.h>
29 #include <signal.h>
30 #include <syscall.h>
31 #include <sys/utsname.h>
32 #include <sys/mman.h>
33 #include <sys/ioctl.h>
34 
35 /* PAPI-specific includes */
36 #include "papi.h"
37 #include "papi_memory.h"
38 #include "papi_internal.h"
39 #include "papi_vector.h"
40 #include "extras.h"
41 
42 /* libpfm4 includes */
43 #include "papi_libpfm4_events.h"
44 #include "pe_libpfm4_events.h"
45 #include "perfmon/pfmlib.h"
46 #include PEINCLUDE
47 
48 /* Linux-specific includes */
49 #include "mb.h"
50 #include "linux-memory.h"
51 #include "linux-timer.h"
52 #include "linux-common.h"
53 #include "linux-context.h"
54 
55 #include "perf_event_lib.h"
56 #include "perf_helpers.h"
57 
58 /* Set to enable pre-Linux 2.6.34 perf_event workarounds */
59 /* If disabling them gets no complaints then we can remove */
60 /* These in a future version of PAPI. */
61 #define OBSOLETE_WORKAROUNDS 0
62 
63 /* Defines for ctx->state */
64 #define PERF_EVENTS_OPENED 0x01
65 #define PERF_EVENTS_RUNNING 0x02
66 
67 /* Forward declaration */
69 
70 /* Globals */
72 static int our_cidx;
74 
75 /* The kernel developers say to never use a refresh value of 0 */
76 /* See https://lkml.org/lkml/2011/5/24/172 */
77 /* However, on some platforms (like Power) a value of 1 does not work */
78 /* We're still tracking down why this happens. */
79 
80 #if defined(__powerpc__)
81 #define PAPI_REFRESH_VALUE 0
82 #else
83 #define PAPI_REFRESH_VALUE 1
84 #endif
85 
86 static int _pe_set_domain( hwd_control_state_t *ctl, int domain);
87 
88 #if (OBSOLETE_WORKAROUNDS==1)
89 
90 /* Check for processor support */
91 /* Can be used for generic checking, though in general we only */
92 /* check for pentium4 here because support was broken for multiple */
93 /* kernel releases and the usual standard detections did not */
94 /* handle this. So we check for pentium 4 explicitly. */
95 static int
96 processor_supported(int vendor, int family) {
97 
98  /* Error out if kernel too early to support p4 */
99  if (( vendor == PAPI_VENDOR_INTEL ) && (family == 15)) {
100  if (_papi_os_info.os_version < LINUX_VERSION(2,6,35)) {
101  PAPIERROR("Pentium 4 not supported on kernels before 2.6.35");
102  return PAPI_ENOSUPP;
103  }
104  }
105  return PAPI_OK;
106 }
107 
108 #endif
109 
110 /* Fix up the config based on what CPU/Vendor we are running on */
111 static int
113 {
114  /* powerpc */
115  /* On IBM and Power6 Machines default domain should include supervisor */
117  vector->cmp_info.available_domains |=
119  if (strcmp(_papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0 ) {
120  vector->cmp_info.default_domain =
122  }
123  }
124 
127  }
128 
131  vector->cmp_info.fast_real_timer = 1;
132  }
133 
134  /* ARM */
136 
137  /* Some ARMv7 and earlier could not measure */
138  /* KERNEL and USER separately. */
139 
140  /* Whitelist CortexA7 and CortexA15 */
141  /* There might be more */
142 
146 
147  vector->cmp_info.available_domains |=
149  vector->cmp_info.default_domain =
151  }
152  }
153 
154  /* CRAY */
157  }
158 
159  return PAPI_OK;
160 }
161 
162 
163 
164 /******************************************************************/
165 /******** Kernel Version Dependent Routines **********************/
166 /******************************************************************/
167 
168 
169 /* PERF_FORMAT_GROUP allows reading an entire group's counts at once */
170 /* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results */
171 /* from attached processes. We are lazy and disable it for all cases */
172 /* commit was: 050735b08ca8a016bbace4445fa025b88fee770b */
173 
174 static int
176 
177 
178 #if (OBSOLETE_WORKAROUNDS==1)
179  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) return 1;
180 #endif
181 
182  /* MIPS, as of version 3.1, does not support this properly */
183  /* FIXME: is this still true? */
184 
185 #if defined(__mips__)
186  return 1;
187 #endif
188 
189  return 0;
190 
191 }
192 
193 #if (OBSOLETE_WORKAROUNDS==1)
194 
195 
196 /* There's a bug prior to Linux 2.6.33 where if you are using */
197 /* PERF_FORMAT_GROUP, the TOTAL_TIME_ENABLED and */
198 /* TOTAL_TIME_RUNNING fields will be zero unless you disable */
199 /* the counters first */
200 static int
201 bug_sync_read(void) {
202 
203  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
204 
205  return 0;
206 
207 }
208 
209 #endif
210 
211 /* Set the F_SETOWN_EX flag on the fd. */
212 /* This affects which thread an overflow signal gets sent to */
213 /* Handled in a subroutine to handle the fact that the behavior */
214 /* is dependent on kernel version. */
215 static int
217 
218  int ret;
219  struct f_owner_ex fown_ex;
220 
221  /* F_SETOWN_EX is not available until 2.6.32 */
222  /* but PAPI perf_event support didn't work on 2.6.31 anyay */
223 
224  /* set ownership of the descriptor */
225  fown_ex.type = F_OWNER_TID;
226  fown_ex.pid = mygettid();
227  ret = fcntl(fd, F_SETOWN_EX, (unsigned long)&fown_ex );
228 
229  if ( ret == -1 ) {
230  PAPIERROR( "cannot fcntl(F_SETOWN_EX) on %d: %s",
231  fd, strerror( errno ) );
232  return PAPI_ESYS;
233  }
234  return PAPI_OK;
235 }
236 
237 /* The read format on perf_event varies based on various flags that */
238 /* are passed into it. This helper avoids copying this logic */
239 /* multiple places. */
240 static unsigned int
242  unsigned int inherit,
243  int format_group )
244 {
245  unsigned int format = 0;
246 
247  /* if we need read format options for multiplexing, add them now */
248  if (multiplex) {
249  format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
250  format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
251  }
252 
253  /* if our kernel supports it and we are not using inherit, */
254  /* add the group read options */
255  if ( (!bug_format_group()) && !inherit) {
256  if (format_group) {
257  format |= PERF_FORMAT_GROUP;
258  }
259  }
260 
261  SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n",
262  multiplex, inherit, format_group, format);
263 
264  return format;
265 }
266 
267 
268 /* attr.exclude_guest is enabled by default in recent libpfm4 */
269 /* however older kernels will reject events with it set */
270 /* because the reserved field is not all zeros */
271 static int
273 {
274  int ev_fd;
275  struct perf_event_attr attr;
276 
278 
279  /* First check that we can open a plain instructions event */
280  memset(&attr, 0 , sizeof(attr));
281  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
282 
283  ev_fd = sys_perf_event_open( &attr, 0, -1, -1, 0 );
284  if ( ev_fd == -1 ) {
285  PAPIERROR("Couldn't open hw_instructions in exclude_guest=0 test");
286  return -1;
287  }
288  close(ev_fd);
289 
290  /* Now try again with excude_guest */
291  memset(&attr, 0 , sizeof(attr));
292  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
293  attr.exclude_guest=1;
294 
295  ev_fd = sys_perf_event_open( &attr, 0, -1, -1, 0 );
296  if ( ev_fd == -1 ) {
297  if (errno==EINVAL) {
298  exclude_guest_unsupported=1;
299  }
300  else {
301  PAPIERROR("Couldn't open hw_instructions in exclude_guest=1 test");
302  }
303  } else {
304  exclude_guest_unsupported=0;
305  close(ev_fd);
306  }
307 
308  return PAPI_OK;
309 }
310 
311 /*****************************************************************/
312 /********* End Kernel-version Dependent Routines ****************/
313 /*****************************************************************/
314 
315 /*****************************************************************/
316 /********* Begin perf_event low-level code ***********************/
317 /*****************************************************************/
318 
319 static void perf_event_dump_attr( struct perf_event_attr *hw_event,
320  pid_t pid, int cpu, int group_fd, unsigned long int flags) {
321 
322  /* Mark parameters as not used */
323  /* In the common case (no SUBDBG) the function */
324  /* compiles into an empty function and complains */
325  /* about unused variables. */
326  (void)hw_event;
327  (void)pid;
328  (void)cpu;
329  (void)group_fd;
330  (void)flags;
331 
332  SUBDBG("sys_perf_event_open(hw_event: %p, pid: %d, cpu: %d, "
333  "group_fd: %d, flags: %lx\n",
334  hw_event, pid, cpu, group_fd, flags);
335  SUBDBG(" type: %d\n",hw_event->type);
336  SUBDBG(" size: %d\n",hw_event->size);
337  SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",
338  hw_event->config, hw_event->config);
339  SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period);
340  SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type);
341  SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format);
342  SUBDBG(" disabled: %d\n",hw_event->disabled);
343  SUBDBG(" inherit: %d\n",hw_event->inherit);
344  SUBDBG(" pinned: %d\n",hw_event->pinned);
345  SUBDBG(" exclusive: %d\n",hw_event->exclusive);
346  SUBDBG(" exclude_user: %d\n",hw_event->exclude_user);
347  SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel);
348  SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv);
349  SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle);
350  SUBDBG(" mmap: %d\n",hw_event->mmap);
351  SUBDBG(" comm: %d\n",hw_event->comm);
352  SUBDBG(" freq: %d\n",hw_event->freq);
353  SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat);
354  SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec);
355  SUBDBG(" task: %d\n",hw_event->task);
356  SUBDBG(" watermark: %d\n",hw_event->watermark);
357  SUBDBG(" precise_ip: %d\n",hw_event->precise_ip);
358  SUBDBG(" mmap_data: %d\n",hw_event->mmap_data);
359  SUBDBG(" sample_id_all: %d\n",hw_event->sample_id_all);
360  SUBDBG(" exclude_host: %d\n",hw_event->exclude_host);
361  SUBDBG(" exclude_guest: %d\n",hw_event->exclude_guest);
362  SUBDBG(" exclude_callchain_kernel: %d\n",
363  hw_event->exclude_callchain_kernel);
364  SUBDBG(" exclude_callchain_user: %d\n",
365  hw_event->exclude_callchain_user);
366  SUBDBG(" wakeup_events: %"PRIx32" (%"PRIu32")\n",
367  hw_event->wakeup_events, hw_event->wakeup_events);
368  SUBDBG(" bp_type: %"PRIx32" (%"PRIu32")\n",
369  hw_event->bp_type, hw_event->bp_type);
370  SUBDBG(" config1: %"PRIx64" (%"PRIu64")\n",
371  hw_event->config1, hw_event->config1);
372  SUBDBG(" config2: %"PRIx64" (%"PRIu64")\n",
373  hw_event->config2, hw_event->config2);
374  SUBDBG(" branch_sample_type: %"PRIx64" (%"PRIu64")\n",
375  hw_event->branch_sample_type, hw_event->branch_sample_type);
376  SUBDBG(" sample_regs_user: %"PRIx64" (%"PRIu64")\n",
377  hw_event->sample_regs_user, hw_event->sample_regs_user);
378  SUBDBG(" sample_stack_user: %"PRIx32" (%"PRIu32")\n",
379  hw_event->sample_stack_user, hw_event->sample_stack_user);
380 }
381 
382 
383 static int map_perf_event_errors_to_papi(int perf_event_error) {
384 
385  int ret;
386 
387  /* These mappings are approximate.
388  EINVAL in particular can mean lots of different things */
389  switch(perf_event_error) {
390  case EPERM:
391  case EACCES:
392  ret = PAPI_EPERM;
393  break;
394  case ENODEV:
395  case EOPNOTSUPP:
396  ret = PAPI_ENOSUPP;
397  break;
398  case ENOENT:
399  ret = PAPI_ENOEVNT;
400  break;
401  case ENOSYS:
402  case EAGAIN:
403  case EBUSY:
404  case E2BIG: /* Only happens if attr is the wrong size somehow */
405  case EBADF: /* We are attempting to group with an invalid file descriptor */
406  ret = PAPI_ESYS;
407  break;
408  case ENOMEM:
409  ret = PAPI_ENOMEM;
410  break;
411  case EMFILE: /* Out of file descriptors. Typically max out at 1024 */
412  ret = PAPI_ECOUNT;
413  break;
414  case EINVAL:
415  default:
416  ret = PAPI_EINVAL;
417  break;
418  }
419  return ret;
420 }
421 
422 
424 /* perf_events. */
425 /* We do this by temporarily opening an event with the */
426 /* desired options then closing it again. We use the */
427 /* PERF_COUNT_HW_INSTRUCTION event as a dummy event */
428 /* on the assumption it is available on all */
429 /* platforms. */
430 
431 static int
432 check_permissions( unsigned long tid,
433  unsigned int cpu_num,
434  unsigned int domain,
435  unsigned int granularity,
436  unsigned int multiplex,
437  unsigned int inherit )
438 {
439  int ev_fd;
440  struct perf_event_attr attr;
441 
442  long pid;
443 
444  /* clearing this will set a type of hardware and to count all domains */
445  memset(&attr, '\0', sizeof(attr));
446  attr.read_format = get_read_format(multiplex, inherit, 1);
447 
448  /* set the event id (config field) to instructios */
449  /* (an event that should always exist) */
450  /* This was cycles but that is missing on Niagara */
451  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
452 
453  /* now set up domains this event set will be counting */
454  if (!(domain & PAPI_DOM_SUPERVISOR)) {
455  attr.exclude_hv = 1;
456  }
457  if (!(domain & PAPI_DOM_USER)) {
458  attr.exclude_user = 1;
459  }
460  if (!(domain & PAPI_DOM_KERNEL)) {
461  attr.exclude_kernel = 1;
462  }
463 
464  if (granularity==PAPI_GRN_SYS) {
465  pid = -1;
466  } else {
467  pid = tid;
468  }
469 
470  SUBDBG("Calling sys_perf_event_open() from check_permissions\n");
471 
472  perf_event_dump_attr( &attr, pid, cpu_num, -1, 0 );
473 
474  ev_fd = sys_perf_event_open( &attr, pid, cpu_num, -1, 0 );
475  if ( ev_fd == -1 ) {
476  SUBDBG("sys_perf_event_open returned error. Linux says, %s",
477  strerror( errno ) );
479  }
480 
481  /* now close it, this was just to make sure we have permissions */
482  /* to set these options */
483  close(ev_fd);
484  return PAPI_OK;
485 }
486 
487 /* Maximum size we ever expect to read from a perf_event fd */
488 /* (this is the number of 64-bit values) */
489 /* We use this to size the read buffers */
490 /* The three is for event count, time_enabled, time_running */
491 /* and the counter term is count value and count id for each */
492 /* possible counter value. */
493 #define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS))
494 
495 
496 
497 /* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch */
498 /* implementations (e.g. x86 before 2.6.33) which don't do a static event */
499 /* scheduability check in sys_perf_event_open. It is also needed if the */
500 /* kernel is stealing an event, such as when NMI watchdog is enabled. */
501 
502 static int
504 {
505  int retval = 0, cnt = -1;
506  ( void ) ctx; /*unused */
507  long long papi_pe_buffer[READ_BUFFER_SIZE];
508  int i,group_leader_fd;
509 
510  /* If the kernel isn't tracking scheduability right */
511  /* Then we need to start/stop/read to force the event */
512  /* to be scheduled and see if an error condition happens. */
513 
514  /* get the proper fd to start */
515  group_leader_fd=ctl->events[idx].group_leader_fd;
516  if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
517 
518  /* start the event */
519  retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
520  if (retval == -1) {
521  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
522  return PAPI_ESYS;
523  }
524 
525  /* stop the event */
526  retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
527  if (retval == -1) {
528  PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
529  return PAPI_ESYS;
530  }
531 
532  /* See if a read returns any results */
533  cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
534  if ( cnt == -1 ) {
535  SUBDBG( "read returned an error! Should never happen.\n" );
536  return PAPI_ESYS;
537  }
538 
539  if ( cnt == 0 ) {
540  /* We read 0 bytes if we could not schedule the event */
541  /* The kernel should have detected this at open */
542  /* but various bugs (including NMI watchdog) */
543  /* result in this behavior */
544 
545  return PAPI_ECNFLCT;
546 
547  } else {
548 
549  /* Reset all of the counters (opened so far) back to zero */
550  /* from the above brief enable/disable call pair. */
551 
552  /* We have to reset all events because reset of group leader */
553  /* does not reset all. */
554  /* we assume that the events are being added one by one and that */
555  /* we do not need to reset higher events (doing so may reset ones */
556  /* that have not been initialized yet. */
557 
558  /* Note... PERF_EVENT_IOC_RESET does not reset time running */
559  /* info if multiplexing, so we should avoid coming here if */
560  /* we are multiplexing the event. */
561  for( i = 0; i < idx; i++) {
562  retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
563  if (retval == -1) {
564  PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
565  "(fd %d)failed",
566  i,ctl->num_events,idx,ctl->events[i].event_fd);
567  return PAPI_ESYS;
568  }
569  }
570  }
571  return PAPI_OK;
572 }
573 
574 
575 /* Do some extra work on a perf_event fd if we're doing sampling */
576 /* This mostly means setting up the mmap buffer. */
577 static int
579 {
580  int ret;
581  int fd = ctl->events[evt_idx].event_fd;
582 
583  /* Register that we would like a SIGIO notification when a mmap'd page */
584  /* becomes full. */
585  ret = fcntl( fd, F_SETFL, O_ASYNC | O_NONBLOCK );
586  if ( ret ) {
587  PAPIERROR ( "fcntl(%d, F_SETFL, O_ASYNC | O_NONBLOCK) "
588  "returned error: %s", fd, strerror( errno ) );
589  return PAPI_ESYS;
590  }
591 
592  /* Set the F_SETOWN_EX flag on the fd. */
593  /* This affects which thread an overflow signal gets sent to. */
594  ret=fcntl_setown_fd(fd);
595  if (ret!=PAPI_OK) return ret;
596 
597  /* Set FD_CLOEXEC. Otherwise if we do an exec with an overflow */
598  /* running, the overflow handler will continue into the exec()'d*/
599  /* process and kill it because no signal handler is set up. */
600  ret=fcntl(fd, F_SETFD, FD_CLOEXEC);
601  if (ret) {
602  return PAPI_ESYS;
603  }
604 
605  /* when you explicitely declare that you want a particular signal, */
606  /* even with you use the default signal, the kernel will send more */
607  /* information concerning the event to the signal handler. */
608  /* */
609  /* In particular, it will send the file descriptor from which the */
610  /* event is originating which can be quite useful when monitoring */
611  /* multiple tasks from a single thread. */
612  ret = fcntl( fd, F_SETSIG, ctl->overflow_signal );
613  if ( ret == -1 ) {
614  PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s",
615  ctl->overflow_signal, fd,
616  strerror( errno ) );
617  return PAPI_ESYS;
618  }
619 
620  return PAPI_OK;
621 }
622 
623 static int
624 set_up_mmap( pe_control_t *ctl, int evt_idx)
625 {
626 
627  void *buf_addr;
628  int fd = ctl->events[evt_idx].event_fd;
629 
630  /* mmap() the sample buffer */
631  buf_addr = mmap( NULL,
632  ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
633  PROT_READ | PROT_WRITE,
634  MAP_SHARED,
635  fd, 0 );
636 
637  /* This may happen if we go over the limit in */
638  /* /proc/sys/kernel/perf_event_mlock_kb */
639  /* which defaults to 516k */
640  /* with regular rdpmc events on 4k page archs */
641  /* this is roughly 128 events */
642 
643  /* We sholdn't fail, just fall back to non-rdpmc */
644  /* Although not sure what happens if it's a sample */
645  /* event that fails to mmap. */
646 
647  if ( buf_addr == MAP_FAILED ) {
648  SUBDBG( "mmap(NULL,%d,%d,%d,%d,0): %s",
649  ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
650  PROT_READ | PROT_WRITE,
651  MAP_SHARED,
652  fd, strerror( errno ) );
653 
654  ctl->events[evt_idx].mmap_buf = NULL;
655 
656  /* Easier to just globally disable this, as it should */
657  /* be a fairly uncommon case hopefully. */
658  if (_perf_event_vector.cmp_info.fast_counter_read) {
659  PAPIERROR("Can't mmap, disabling fast_counter_read\n");
660  _perf_event_vector.cmp_info.fast_counter_read=0;
661  }
662  return PAPI_ESYS;
663  }
664 
665  SUBDBG( "Sample buffer for fd %d is located at %p\n", fd, buf_addr );
666 
667  /* Set up the mmap buffer and its associated helpers */
668  ctl->events[evt_idx].mmap_buf = (struct perf_counter_mmap_page *) buf_addr;
669  ctl->events[evt_idx].tail = 0;
670  ctl->events[evt_idx].mask =
671  ( ctl->events[evt_idx].nr_mmap_pages - 1 ) * getpagesize() - 1;
672 
673  return PAPI_OK;
674 }
675 
676 
677 
678 /* Open all events in the control state */
679 static int
681 {
682 
683  int i, ret = PAPI_OK;
684  long pid;
685 
686  if (ctl->granularity==PAPI_GRN_SYS) {
687  pid = -1;
688  }
689  else {
690  pid = ctl->tid;
691  }
692 
693  for( i = 0; i < ctl->num_events; i++ ) {
694 
695  ctl->events[i].event_opened=0;
696 
697  /* set up the attr structure. */
698  /* We don't set up all fields here */
699  /* as some have already been set up previously. */
700 
701  /* Handle the broken exclude_guest problem */
702  /* libpfm4 sets this by default (PEBS events depend on it) */
703  /* but on older kernels that dont know about exclude_guest */
704  /* perf_event_open() will error out as a "reserved" */
705  /* unknown bit is set to 1. */
706  /* Do we need to also watch for exclude_host, exclude_idle */
707  /* exclude_callchain*? */
708  if ((ctl->events[i].attr.exclude_guest) &&
710  SUBDBG("Disabling exclude_guest in event %d\n",i);
711  ctl->events[i].attr.exclude_guest=0;
712  }
713 
714  /* group leader (event 0) is special */
715  /* If we're multiplexed, everyone is a group leader */
716  if (( i == 0 ) || (ctl->multiplexed)) {
717  ctl->events[i].attr.pinned = !ctl->multiplexed;
718  ctl->events[i].attr.disabled = 1;
719  ctl->events[i].group_leader_fd=-1;
720  ctl->events[i].attr.read_format = get_read_format(
721  ctl->multiplexed,
722  ctl->inherit,
723  !ctl->multiplexed );
724  } else {
725  ctl->events[i].attr.pinned=0;
726  ctl->events[i].attr.disabled = 0;
727  ctl->events[i].group_leader_fd=ctl->events[0].event_fd;
728  ctl->events[i].attr.read_format = get_read_format(
729  ctl->multiplexed,
730  ctl->inherit,
731  0 );
732  }
733 
734  /* try to open */
736  &ctl->events[i].attr,
737  pid,
738  ctl->events[i].cpu,
739  ctl->events[i].group_leader_fd,
740  0 /* flags */ );
741 
743  &ctl->events[i].attr,
744  pid,
745  ctl->events[i].cpu,
746  ctl->events[i].group_leader_fd,
747  0 /* flags */ );
748 
749  /* Try to match Linux errors to PAPI errors */
750  if ( ctl->events[i].event_fd == -1 ) {
751  SUBDBG("sys_perf_event_open returned error "
752  "on event #%d. Error: %s\n",
753  i, strerror( errno ) );
755 
756  goto open_pe_cleanup;
757  }
758 
759  SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d,"
760  " group_leader/fd: %d, event_fd: %d,"
761  " read_format: %"PRIu64"\n",
762  pid, ctl->events[i].cpu,
763  ctl->events[i].group_leader_fd,
764  ctl->events[i].event_fd,
765  ctl->events[i].attr.read_format);
766 
767 
768  /* in many situations the kernel will indicate we opened fine */
769  /* yet things will fail later. So we need to double check */
770  /* we actually can use the events we've set up. */
771 
772  /* This is not necessary if we are multiplexing, and in fact */
773  /* we cannot do this properly if multiplexed because */
774  /* PERF_EVENT_IOC_RESET does not reset the time running info */
775  if (!ctl->multiplexed) {
776  ret = check_scheduability( ctx, ctl, i );
777 
778  if ( ret != PAPI_OK ) {
779  /* the last event did open, so we need to */
780  /* bump the counter before doing the cleanup */
781  i++;
782  goto open_pe_cleanup;
783  }
784  }
785  ctl->events[i].event_opened=1;
786  }
787 
788  /* Now that we've successfully opened all of the events, do whatever */
789  /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */
790  /* and so on. */
791 
792 
793  /* Make things easier and give each event a mmap() buffer */
794  /* Keeping separate tracking for rdpmc vs regular events */
795  /* Would be a pain. Also perf always gives every event a */
796  /* mmap buffer. */
797 
798  for ( i = 0; i < ctl->num_events; i++ ) {
799 
800  /* Can't mmap() inherited events :( */
801  if (ctl->inherit) {
802  ctl->events[i].nr_mmap_pages = 0;
803  ctl->events[i].mmap_buf = NULL;
804  }
805  else {
806  /* Just a guess at how many pages would make this */
807  /* relatively efficient. */
808  /* Note that it's "1 +" because of the need for a */
809  /* control page, and the number following the "+" */
810  /* must be a power of 2 (1, 4, 8, 16, etc) or zero. */
811  /* This is required to optimize dealing with */
812  /* circular buffer wrapping of the mapped pages. */
813  if (ctl->events[i].sampling) {
814  ctl->events[i].nr_mmap_pages = 1 + 2;
815  }
816  else if (_perf_event_vector.cmp_info.fast_counter_read) {
817  ctl->events[i].nr_mmap_pages = 1;
818  }
819  else {
820  ctl->events[i].nr_mmap_pages = 0;
821  }
822 
823  /* Set up the MMAP sample pages */
824  if (ctl->events[i].nr_mmap_pages) {
825  set_up_mmap(ctl,i);
826  } else {
827  ctl->events[i].mmap_buf = NULL;
828  }
829  }
830  }
831 
832  for ( i = 0; i < ctl->num_events; i++ ) {
833 
834  /* If sampling is enabled, hook up signal handler */
835  if (ctl->events[i].attr.sample_period) {
836 
837  ret = configure_fd_for_sampling( ctl, i );
838  if ( ret != PAPI_OK ) {
839  /* We failed, and all of the fds are open */
840  /* so we need to clean up all of them */
841  i = ctl->num_events;
842  goto open_pe_cleanup;
843  }
844  }
845  }
846 
847  /* Set num_evts only if completely successful */
848  ctx->state |= PERF_EVENTS_OPENED;
849 
850  return PAPI_OK;
851 
852 open_pe_cleanup:
853  /* We encountered an error, close up the fds we successfully opened. */
854  /* We go backward in an attempt to close group leaders last, although */
855  /* That's probably not strictly necessary. */
856  while ( i > 0 ) {
857  i--;
858  if (ctl->events[i].event_fd>=0) {
859  close( ctl->events[i].event_fd );
860  ctl->events[i].event_opened=0;
861  }
862  }
863 
864  return ret;
865 }
866 
867 /* TODO: make code clearer -- vmw */
868 static int
870 {
871  int munmap_error=0,close_error=0;
872 
873  if ( event->mmap_buf ) {
874  if (event->nr_mmap_pages==0) {
875  PAPIERROR("munmap and num pages is zero");
876  }
877  if ( munmap ( event->mmap_buf,
878  event->nr_mmap_pages * getpagesize() ) ) {
879  PAPIERROR( "munmap of fd = %d returned error: %s",
880  event->event_fd,
881  strerror( errno ) );
882  event->mmap_buf=NULL;
883  munmap_error=1;
884  }
885  }
886  if ( close( event->event_fd ) ) {
887  PAPIERROR( "close of fd = %d returned error: %s",
888  event->event_fd, strerror( errno ) );
889  close_error=1;
890  }
891 
892  event->event_opened=0;
893 
894  if ((close_error || munmap_error)) {
895  return PAPI_ESYS;
896  }
897 
898  return 0;
899 }
900 
901 /* Close all of the opened events */
902 static int
904 {
905  int i,result;
906  int num_closed=0;
907  int events_not_opened=0;
908 
909  /* should this be a more serious error? */
910  if ( ctx->state & PERF_EVENTS_RUNNING ) {
911  SUBDBG("Closing without stopping first\n");
912  }
913 
914  /* Close child events first */
915  /* Is that necessary? -- vmw */
916  for( i=0; i<ctl->num_events; i++ ) {
917  if (ctl->events[i].event_opened) {
918  if (ctl->events[i].group_leader_fd!=-1) {
919  result=close_event(&ctl->events[i]);
920  if (result!=0) return result;
921  else num_closed++;
922  }
923  }
924  else {
925  events_not_opened++;
926  }
927  }
928 
929  /* Close the group leaders last */
930  for( i=0; i<ctl->num_events; i++ ) {
931  if (ctl->events[i].event_opened) {
932  if (ctl->events[i].group_leader_fd==-1) {
933  result=close_event(&ctl->events[i]);
934  if (result!=0) return result;
935  else num_closed++;
936  }
937  }
938  }
939 
940  if (ctl->num_events!=num_closed) {
941  if (ctl->num_events!=(num_closed+events_not_opened)) {
942  PAPIERROR("Didn't close all events: "
943  "Closed %d Not Opened: %d Expected %d",
944  num_closed,events_not_opened,ctl->num_events);
945  return PAPI_EBUG;
946  }
947  }
948 
949  ctl->num_events=0;
950 
951  ctx->state &= ~PERF_EVENTS_OPENED;
952 
953  return PAPI_OK;
954 }
955 
956 
957 /********************************************************************/
958 /********************************************************************/
959 /* Functions that are exported via the component interface */
960 /********************************************************************/
961 /********************************************************************/
962 
963 /********************* DOMAIN RELATED *******************************/
964 
965 
966 /* set the domain. */
967 /* perf_events allows per-event control of this, */
968 /* papi allows it to be set at the event level or at the event set level. */
969 /* this will set the event set level domain values */
970 /* but they only get used if no event level domain mask (u= or k=) */
971 /* was specified. */
972 static int
974 {
975  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
976 
977  SUBDBG("old control domain %d, new domain %d\n", pe_ctl->domain,domain);
978  pe_ctl->domain = domain;
979  return PAPI_OK;
980 }
981 
982 
983 /********************* THREAD RELATED *******************************/
984 
985 
986 /* Shutdown a thread */
987 static int
989 {
990  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
991 
992  pe_ctx->initialized=0;
993 
994  return PAPI_OK;
995 }
996 
997 /* Initialize a thread */
998 static int
1000 {
1001 
1002  pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx;
1003 
1004  /* clear the context structure and mark as initialized */
1005  memset( pe_ctx, 0, sizeof ( pe_context_t ) );
1006  pe_ctx->initialized=1;
1008  pe_ctx->cidx=our_cidx;
1009 
1010  return PAPI_OK;
1011 }
1012 
1013 
1014 
1015 /**************************** COUNTER RELATED *******************/
1016 
1017 
1018 /* reset the hardware counters */
1019 /* Note: PAPI_reset() does not necessarily call this */
1020 /* unless the events are actually running. */
1021 static int
1023 {
1024  int i, ret;
1025  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1026 
1027  ( void ) ctx; /*unused */
1028 
1029  /* We need to reset all of the events, not just the group leaders */
1030  for( i = 0; i < pe_ctl->num_events; i++ ) {
1031  ret = ioctl( pe_ctl->events[i].event_fd,
1032  PERF_EVENT_IOC_RESET, NULL );
1033  if ( ret == -1 ) {
1034  PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
1035  "returned error, Linux says: %s",
1036  pe_ctl->events[i].event_fd,
1037  strerror( errno ) );
1038  return PAPI_ESYS;
1039  }
1040  }
1041 
1042  return PAPI_OK;
1043 }
1044 
1045 
1046 /* write (set) the hardware counters */
1047 /* Currently we do not support this. */
1048 static int
1050  long long *from )
1051 {
1052  ( void ) ctx; /*unused */
1053  ( void ) ctl; /*unused */
1054  ( void ) from; /*unused */
1055  /*
1056  * Counters cannot be written. Do we need to virtualize the
1057  * counters so that they can be written, or perhaps modify code so that
1058  * they can be written? FIXME ?
1059  */
1060 
1061  return PAPI_ENOSUPP;
1062 }
1063 
1064 /*
1065  * perf_event provides a complicated read interface.
1066  * the info returned by read() varies depending on whether
1067  * you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED,
1068  * PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set
1069  *
1070  * To simplify things we just always ask for everything. This might
1071  * lead to overhead when reading more than we need, but it makes the
1072  * read code a lot simpler than the original implementation we had here.
1073  *
1074  * For more info on the layout see include/uapi/linux/perf_event.h
1075  *
1076  */
1077 
1078 
1079 /* When we read with rdpmc, we must read each counter individually */
1080 /* Because of this we don't need separate multiplexing support */
1081 /* This is all handled by mmap_read_self() */
1082 static int
1084  long long **events, int flags )
1085 {
1086  SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n",
1087  ctx, ctl, events, flags);
1088 
1089  ( void ) flags; /*unused */
1090  ( void ) ctx; /*unused */
1091  int i;
1092  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1093  unsigned long long count, enabled, running, adjusted;
1094 
1095  /* we must read each counter individually */
1096  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1097 
1098  count = mmap_read_self(pe_ctl->events[i].mmap_buf,
1099  &enabled,&running);
1100 
1101  /* TODO: error checking? */
1102 
1103  /* Handle multiplexing case */
1104  if (enabled!=running) {
1105  adjusted = (enabled * 128LL) / running;
1106  adjusted = adjusted * count;
1107  adjusted = adjusted / 128LL;
1108  count = adjusted;
1109  }
1110 
1111  pe_ctl->counts[i] = count;
1112  }
1113  /* point PAPI to the values we read */
1114  *events = pe_ctl->counts;
1115 
1116  SUBDBG("EXIT: *events: %p\n", *events);
1117 
1118  return PAPI_OK;
1119 }
1120 
1121 
1122 static int
1124 {
1125  int i,ret=-1;
1126  long long papi_pe_buffer[READ_BUFFER_SIZE];
1127  long long tot_time_running, tot_time_enabled, scale;
1128 
1129  /* perf_event does not support FORMAT_GROUP on multiplex */
1130  /* so we have to handle separate events when multiplexing */
1131 
1132  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1133 
1134  ret = read( pe_ctl->events[i].event_fd,
1135  papi_pe_buffer,
1136  sizeof ( papi_pe_buffer ) );
1137  if ( ret == -1 ) {
1138  PAPIERROR("read returned an error: ",
1139  strerror( errno ));
1140  return PAPI_ESYS;
1141  }
1142 
1143  /* We should read 3 64-bit values from the counter */
1144  if (ret<(signed)(3*sizeof(long long))) {
1145  PAPIERROR("Error! short read");
1146  return PAPI_ESYS;
1147  }
1148 
1149  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1150  pe_ctl->events[i].event_fd,
1151  (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
1152  SUBDBG("read: %lld %lld %lld\n",
1153  papi_pe_buffer[0],
1154  papi_pe_buffer[1],
1155  papi_pe_buffer[2]);
1156 
1157  tot_time_enabled = papi_pe_buffer[1];
1158  tot_time_running = papi_pe_buffer[2];
1159 
1160  SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * "
1161  "tot_time_enabled %lld) / "
1162  "tot_time_running %lld\n",
1163  i, 0,papi_pe_buffer[0],
1164  tot_time_enabled,tot_time_running);
1165 
1166  if (tot_time_running == tot_time_enabled) {
1167  /* No scaling needed */
1168  pe_ctl->counts[i] = papi_pe_buffer[0];
1169  } else if (tot_time_running && tot_time_enabled) {
1170  /* Scale to give better results */
1171  /* avoid truncation. */
1172  /* Why use 100? Would 128 be faster? */
1173  scale = (tot_time_enabled * 100LL) / tot_time_running;
1174  scale = scale * papi_pe_buffer[0];
1175  scale = scale / 100LL;
1176  pe_ctl->counts[i] = scale;
1177  } else {
1178  /* This should not happen, but Phil reports it sometime does. */
1179  SUBDBG("perf_event kernel bug(?) count, enabled, "
1180  "running: %lld, %lld, %lld\n",
1181  papi_pe_buffer[0],tot_time_enabled,
1182  tot_time_running);
1183 
1184  pe_ctl->counts[i] = papi_pe_buffer[0];
1185  }
1186  }
1187  return PAPI_OK;
1188 }
1189 
1190 /* For cases where we can't group counters together */
1191 /* But must read them out individually */
1192 /* This includes when INHERIT is set, as well as various bugs */
1193 
1194 static int
1196 
1197  int i,ret=-1;
1198  long long papi_pe_buffer[READ_BUFFER_SIZE];
1199 
1200  /* we must read each counter individually */
1201  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1202  ret = read( pe_ctl->events[i].event_fd,
1203  papi_pe_buffer,
1204  sizeof ( papi_pe_buffer ) );
1205  if ( ret == -1 ) {
1206  PAPIERROR("read returned an error: ",
1207  strerror( errno ));
1208  return PAPI_ESYS;
1209  }
1210 
1211  /* we should read one 64-bit value from each counter */
1212  if (ret!=sizeof(long long)) {
1213  PAPIERROR("Error! short read");
1214  PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d",
1215  pe_ctl->events[i].event_fd,
1216  (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
1217  return PAPI_ESYS;
1218  }
1219 
1220  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1221  pe_ctl->events[i].event_fd, (long)pe_ctl->tid,
1222  pe_ctl->events[i].cpu, ret);
1223  SUBDBG("read: %lld\n",papi_pe_buffer[0]);
1224 
1225  pe_ctl->counts[i] = papi_pe_buffer[0];
1226  }
1227 
1228  return PAPI_OK;
1229 
1230 }
1231 
1232 static int
1234  long long **events, int flags )
1235 {
1236  SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n",
1237  ctx, ctl, events, flags);
1238 
1239  ( void ) flags; /*unused */
1240  ( void ) ctx; /*unused */
1241  int i, j, ret = -1;
1242  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1243  long long papi_pe_buffer[READ_BUFFER_SIZE];
1244 
1245  /* Handle fast case */
1246  if ((_perf_event_vector.cmp_info.fast_counter_read) && (!pe_ctl->inherit)) {
1247  return _pe_rdpmc_read( ctx, ctl, events, flags);
1248  }
1249 
1250  /* Handle case where we are multiplexing */
1251  if (pe_ctl->multiplexed) {
1252  _pe_read_multiplexed(pe_ctl);
1253  }
1254 
1255  /* Handle cases where we cannot use FORMAT GROUP */
1256  else if (bug_format_group() || pe_ctl->inherit) {
1257  _pe_read_nogroup(pe_ctl);
1258  }
1259 
1260  /* Handle common case where we are using FORMAT_GROUP */
1261  /* We assume only one group leader, in position 0 */
1262 
1263  /* By reading the leader file descriptor, we get a series */
1264  /* of 64-bit values. The first is the total number of */
1265  /* events, followed by the counts for them. */
1266 
1267  else {
1268  if (pe_ctl->events[0].group_leader_fd!=-1) {
1269  PAPIERROR("Was expecting group leader");
1270  }
1271 
1272  ret = read( pe_ctl->events[0].event_fd,
1273  papi_pe_buffer,
1274  sizeof ( papi_pe_buffer ) );
1275 
1276  if ( ret == -1 ) {
1277  PAPIERROR("read returned an error: ",
1278  strerror( errno ));
1279  return PAPI_ESYS;
1280  }
1281 
1282  /* we read 1 64-bit value (number of events) then */
1283  /* num_events more 64-bit values that hold the counts */
1284  if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) {
1285  PAPIERROR("Error! short read");
1286  return PAPI_ESYS;
1287  }
1288 
1289  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1290  pe_ctl->events[0].event_fd,
1291  (long)pe_ctl->tid, pe_ctl->events[0].cpu, ret);
1292 
1293  for(j=0;j<ret/8;j++) {
1294  SUBDBG("read %d: %lld\n",j,papi_pe_buffer[j]);
1295  }
1296 
1297  /* Make sure the kernel agrees with how many events we have */
1298  if (papi_pe_buffer[0]!=pe_ctl->num_events) {
1299  PAPIERROR("Error! Wrong number of events");
1300  return PAPI_ESYS;
1301  }
1302 
1303  /* put the count values in their proper location */
1304  for(i=0;i<pe_ctl->num_events;i++) {
1305  pe_ctl->counts[i] = papi_pe_buffer[1+i];
1306  }
1307  }
1308 
1309  /* point PAPI to the values we read */
1310  *events = pe_ctl->counts;
1311 
1312  SUBDBG("EXIT: *events: %p\n", *events);
1313 
1314  return PAPI_OK;
1315 }
1316 
1317 #if (OBSOLETE_WORKAROUNDS==1)
1318 /* On kernels before 2.6.33 the TOTAL_TIME_ENABLED and TOTAL_TIME_RUNNING */
1319 /* fields are always 0 unless the counter is disabled. So if we are on */
1320 /* one of these kernels, then we must disable events before reading. */
1321 /* Elsewhere though we disable multiplexing on kernels before 2.6.34 */
1322 /* so maybe this isn't even necessary. */
1323 static int
1324 _pe_read_bug_sync( hwd_context_t *ctx, hwd_control_state_t *ctl,
1325  long long **events, int flags )
1326 {
1327 
1328  ( void ) flags; /*unused */
1329  int i, ret = -1;
1330  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1331  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1332  int result;
1333 
1334  if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
1335  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1336  /* disable only the group leaders */
1337  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1338  ret = ioctl( pe_ctl->events[i].event_fd,
1339  PERF_EVENT_IOC_DISABLE, NULL );
1340  if ( ret == -1 ) {
1341  PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) "
1342  "returned an error: ", strerror( errno ));
1343  return PAPI_ESYS;
1344  }
1345  }
1346  }
1347  }
1348 
1349  result=_pe_read( ctx, ctl, events, flags );
1350 
1351  /* If we disabled the counters due to the sync_read_bug(), */
1352  /* then we need to re-enable them now. */
1353 
1354  if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
1355  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1356  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1357  /* this should refresh any overflow counters too */
1358  ret = ioctl( pe_ctl->events[i].event_fd,
1359  PERF_EVENT_IOC_ENABLE, NULL );
1360  if ( ret == -1 ) {
1361  /* Should never happen */
1362  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) returned an error: ",
1363  strerror( errno ));
1364  return PAPI_ESYS;
1365  }
1366  }
1367  }
1368  }
1369 
1370  return result;
1371 }
1372 
1373 #endif
1374 
1375 /* Start counting events */
1376 static int
1378 {
1379  int ret;
1380  int i;
1381  int did_something = 0;
1382  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1383  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1384 
1385  /* Reset the counters first. Is this necessary? */
1386  ret = _pe_reset( pe_ctx, pe_ctl );
1387  if ( ret ) {
1388  return ret;
1389  }
1390 
1391  /* Enable all of the group leaders */
1392  /* All group leaders have a group_leader_fd of -1 */
1393  for( i = 0; i < pe_ctl->num_events; i++ ) {
1394  if (pe_ctl->events[i].group_leader_fd == -1) {
1395  SUBDBG("ioctl(enable): fd: %d\n",
1396  pe_ctl->events[i].event_fd);
1397  ret=ioctl( pe_ctl->events[i].event_fd,
1398  PERF_EVENT_IOC_ENABLE, NULL) ;
1399 
1400  /* ioctls always return -1 on failure */
1401  if (ret == -1) {
1402  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1403  return PAPI_ESYS;
1404  }
1405 
1406  did_something++;
1407  }
1408  }
1409 
1410  if (!did_something) {
1411  PAPIERROR("Did not enable any counters");
1412  return PAPI_EBUG;
1413  }
1414 
1415  pe_ctx->state |= PERF_EVENTS_RUNNING;
1416 
1417  return PAPI_OK;
1418 
1419 }
1420 
1421 /* Stop all of the counters */
1422 static int
1424 {
1425  SUBDBG( "ENTER: ctx: %p, ctl: %p\n", ctx, ctl);
1426 
1427  int ret;
1428  int i;
1429  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1430  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1431 
1432  /* Just disable the group leaders */
1433  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1434  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1435  ret=ioctl( pe_ctl->events[i].event_fd,
1436  PERF_EVENT_IOC_DISABLE, NULL);
1437  if ( ret == -1 ) {
1438  PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) "
1439  "returned error, Linux says: %s",
1440  pe_ctl->events[i].event_fd, strerror( errno ) );
1441  return PAPI_EBUG;
1442  }
1443  }
1444  }
1445 
1446  pe_ctx->state &= ~PERF_EVENTS_RUNNING;
1447 
1448  SUBDBG( "EXIT:\n");
1449 
1450  return PAPI_OK;
1451 }
1452 
1453 
1454 
1455 
1456 
1457 /*********************** CONTROL STATE RELATED *******************/
1458 
1459 
1460 /* This function clears the current contents of the control structure and
1461  updates it with whatever resources are allocated for all the native events
1462  in the native info structure array. */
1463 
1464 static int
1467  int count, hwd_context_t *ctx )
1468 {
1469  SUBDBG( "ENTER: ctl: %p, native: %p, count: %d, ctx: %p\n",
1470  ctl, native, count, ctx);
1471  int i;
1472  int j;
1473  int ret;
1474  int skipped_events=0;
1475  struct native_event_t *ntv_evt;
1476  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1477  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1478 
1479  /* close all of the existing fds and start over again */
1480  /* In theory we could have finer-grained control and know if */
1481  /* things were changed, but it's easier to tear things down and rebuild. */
1482  close_pe_events( pe_ctx, pe_ctl );
1483 
1484  /* Calling with count==0 should be OK, it's how things are deallocated */
1485  /* when an eventset is destroyed. */
1486  if ( count == 0 ) {
1487  SUBDBG( "EXIT: Called with count == 0\n" );
1488  return PAPI_OK;
1489  }
1490 
1491  /* set up all the events */
1492  for( i = 0; i < count; i++ ) {
1493  if ( native ) {
1494  /* get the native event pointer used for this papi event */
1495  int ntv_idx = _papi_hwi_get_ntv_idx((unsigned)(native[i].ni_papi_code));
1496  if (ntv_idx < -1) {
1497  SUBDBG("papi_event_code: %#x known by papi but not by the component\n", native[i].ni_papi_code);
1498  continue;
1499  }
1500  /* if native index is -1, then we have an event without a mask and need to find the right native index to use */
1501  if (ntv_idx == -1) {
1502  /* find the native event index we want by matching for the right papi event code */
1503  for (j=0 ; j<pe_ctx->event_table->num_native_events ; j++) {
1504  if (pe_ctx->event_table->native_events[j].papi_event_code == native[i].ni_papi_code) {
1505  ntv_idx = j;
1506  }
1507  }
1508  }
1509 
1510  /* if native index is still negative, we did not find event we wanted so just return error */
1511  if (ntv_idx < 0) {
1512  SUBDBG("papi_event_code: %#x not found in native event tables\n", native[i].ni_papi_code);
1513  continue;
1514  }
1515 
1516  /* this native index is positive so there was a mask with the event, the ntv_idx identifies which native event to use */
1517  ntv_evt = (struct native_event_t *)(&(pe_ctx->event_table->native_events[ntv_idx]));
1518  SUBDBG("ntv_evt: %p\n", ntv_evt);
1519 
1520  SUBDBG("i: %d, pe_ctx->event_table->num_native_events: %d\n", i, pe_ctx->event_table->num_native_events);
1521 
1522  /* Move this events hardware config values and other attributes to the perf_events attribute structure */
1523  memcpy (&pe_ctl->events[i].attr, &ntv_evt->attr, sizeof(perf_event_attr_t));
1524 
1525  /* may need to update the attribute structure with information from event set level domain settings (values set by PAPI_set_domain) */
1526  /* only done if the event mask which controls each counting domain was not provided */
1527 
1528  /* get pointer to allocated name, will be NULL when adding preset events to event set */
1529  char *aName = ntv_evt->allocated_name;
1530  if ((aName == NULL) || (strstr(aName, ":u=") == NULL)) {
1531  SUBDBG("set exclude_user attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_user, !(pe_ctl->domain & PAPI_DOM_USER));
1532  pe_ctl->events[i].attr.exclude_user = !(pe_ctl->domain & PAPI_DOM_USER);
1533  }
1534  if ((aName == NULL) || (strstr(aName, ":k=") == NULL)) {
1535  SUBDBG("set exclude_kernel attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_kernel, !(pe_ctl->domain & PAPI_DOM_KERNEL));
1536  pe_ctl->events[i].attr.exclude_kernel = !(pe_ctl->domain & PAPI_DOM_KERNEL);
1537  }
1538 
1539  // libpfm4 supports mh (monitor host) and mg (monitor guest) event masks
1540  // perf_events supports exclude_hv and exclude_idle attributes
1541  // PAPI_set_domain supports PAPI_DOM_SUPERVISOR and PAPI_DOM_OTHER domain attributes
1542  // not sure how these perf_event attributes, and PAPI domain attributes relate to each other
1543  // if that can be figured out then there should probably be code here to set some perf_events attributes based on what was set in a PAPI_set_domain call
1544  // the code sample below is one possibility
1545 // if (strstr(ntv_evt->allocated_name, ":mg=") == NULL) {
1546 // SUBDBG("set exclude_hv attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_hv, !(pe_ctl->domain & PAPI_DOM_SUPERVISOR));
1547 // pe_ctl->events[i].attr.exclude_hv = !(pe_ctl->domain & PAPI_DOM_SUPERVISOR);
1548 // }
1549 
1550 
1551  // set the cpu number provided with an event mask if there was one (will be -1 if mask not provided)
1552  pe_ctl->events[i].cpu = ntv_evt->cpu;
1553  // if cpu event mask not provided, then set the cpu to use to what may have been set on call to PAPI_set_opt (will still be -1 if not called)
1554  if (pe_ctl->events[i].cpu == -1) {
1555  pe_ctl->events[i].cpu = pe_ctl->cpu;
1556  }
1557  } else {
1558  /* This case happens when called from _pe_set_overflow and _pe_ctl */
1559  /* Those callers put things directly into the pe_ctl structure so it is already set for the open call */
1560  }
1561 
1562  /* Copy the inherit flag into the attribute block that will be passed to the kernel */
1563  pe_ctl->events[i].attr.inherit = pe_ctl->inherit;
1564 
1565  /* Set the position in the native structure */
1566  /* We just set up events linearly */
1567  if ( native ) {
1568  native[i].ni_position = i;
1569  SUBDBG( "&native[%d]: %p, ni_papi_code: %#x, ni_event: %#x, ni_position: %d, ni_owners: %d\n",
1570  i, &(native[i]), native[i].ni_papi_code, native[i].ni_event, native[i].ni_position, native[i].ni_owners);
1571  }
1572  }
1573 
1574  if (count <= skipped_events) {
1575  SUBDBG("EXIT: No events to count, they all contained invalid umasks\n");
1576  return PAPI_ENOEVNT;
1577  }
1578 
1579  pe_ctl->num_events = count - skipped_events;
1580 
1581  /* actually open the events */
1582  ret = open_pe_events( pe_ctx, pe_ctl );
1583  if ( ret != PAPI_OK ) {
1584  SUBDBG("EXIT: open_pe_events returned: %d\n", ret);
1585  /* Restore values ? */
1586  return ret;
1587  }
1588 
1589  SUBDBG( "EXIT: PAPI_OK\n" );
1590  return PAPI_OK;
1591 }
1592 
1593 /* Set various options on a control state */
1594 static int
1595 _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
1596 {
1597  int ret;
1598  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1599  pe_control_t *pe_ctl = NULL;
1600 
1601  switch ( code ) {
1602  case PAPI_MULTIPLEX:
1603  pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state );
1604  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
1605  pe_ctl->granularity,
1606  1, pe_ctl->inherit );
1607  if (ret != PAPI_OK) {
1608  return ret;
1609  }
1610 
1611  /* looks like we are allowed, so set multiplexed attribute */
1612  pe_ctl->multiplexed = 1;
1613  ret = _pe_update_control_state( pe_ctl, NULL,
1614  pe_ctl->num_events, pe_ctx );
1615  if (ret != PAPI_OK) {
1616  pe_ctl->multiplexed = 0;
1617  }
1618  return ret;
1619 
1620  case PAPI_ATTACH:
1621  pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state );
1622  ret = check_permissions( option->attach.tid, pe_ctl->cpu,
1623  pe_ctl->domain, pe_ctl->granularity,
1624  pe_ctl->multiplexed,
1625  pe_ctl->inherit );
1626  if (ret != PAPI_OK) {
1627  return ret;
1628  }
1629 
1630  pe_ctl->tid = option->attach.tid;
1631 
1632  /* If events have been already been added, something may */
1633  /* have been done to the kernel, so update */
1634  ret =_pe_update_control_state( pe_ctl, NULL,
1635  pe_ctl->num_events, pe_ctx);
1636 
1637  return ret;
1638 
1639  case PAPI_DETACH:
1640  pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
1641 
1642  pe_ctl->tid = 0;
1643  return PAPI_OK;
1644 
1645  case PAPI_CPU_ATTACH:
1646  pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state );
1647  ret = check_permissions( pe_ctl->tid, option->cpu.cpu_num,
1648  pe_ctl->domain, pe_ctl->granularity,
1649  pe_ctl->multiplexed,
1650  pe_ctl->inherit );
1651  if (ret != PAPI_OK) {
1652  return ret;
1653  }
1654  /* looks like we are allowed so set cpu number */
1655 
1656  /* this tells the kernel not to count for a thread */
1657  /* should we warn if we try to set both? perf_event */
1658  /* will reject it. */
1659  pe_ctl->tid = -1;
1660 
1661  pe_ctl->cpu = option->cpu.cpu_num;
1662 
1663  return PAPI_OK;
1664 
1665  case PAPI_DOMAIN:
1666  pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state );
1667  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu,
1668  option->domain.domain,
1669  pe_ctl->granularity,
1670  pe_ctl->multiplexed,
1671  pe_ctl->inherit );
1672  if (ret != PAPI_OK) {
1673  return ret;
1674  }
1675  /* looks like we are allowed, so set event set level counting domains */
1676  pe_ctl->domain = option->domain.domain;
1677  return PAPI_OK;
1678 
1679  case PAPI_GRANUL:
1680  pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state );
1681 
1682  /* FIXME: we really don't support this yet */
1683 
1684  switch ( option->granularity.granularity ) {
1685  case PAPI_GRN_PROCG:
1686  case PAPI_GRN_SYS_CPU:
1687  case PAPI_GRN_PROC:
1688  return PAPI_ECMP;
1689 
1690  /* Currently we only support thread and CPU granularity */
1691  case PAPI_GRN_SYS:
1692  pe_ctl->granularity=PAPI_GRN_SYS;
1693  pe_ctl->cpu=_papi_getcpu();
1694  break;
1695 
1696  case PAPI_GRN_THR:
1697  pe_ctl->granularity=PAPI_GRN_THR;
1698  break;
1699 
1700 
1701  default:
1702  return PAPI_EINVAL;
1703  }
1704  return PAPI_OK;
1705 
1706  case PAPI_INHERIT:
1707  pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state );
1708  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
1709  pe_ctl->granularity, pe_ctl->multiplexed,
1710  option->inherit.inherit );
1711  if (ret != PAPI_OK) {
1712  return ret;
1713  }
1714  /* looks like we are allowed, so set the requested inheritance */
1715  if (option->inherit.inherit) {
1716  /* children will inherit counters */
1717  pe_ctl->inherit = 1;
1718  } else {
1719  /* children won't inherit counters */
1720  pe_ctl->inherit = 0;
1721  }
1722  return PAPI_OK;
1723 
1724  case PAPI_DATA_ADDRESS:
1725  return PAPI_ENOSUPP;
1726 #if 0
1727  pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
1728  ret = set_default_domain( pe_ctl, option->address_range.domain );
1729  if ( ret != PAPI_OK ) {
1730  return ret;
1731  }
1732  set_drange( pe_ctx, pe_ctl, option );
1733  return PAPI_OK;
1734 #endif
1735  case PAPI_INSTR_ADDRESS:
1736  return PAPI_ENOSUPP;
1737 #if 0
1738  pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
1739  ret = set_default_domain( pe_ctl, option->address_range.domain );
1740  if ( ret != PAPI_OK ) {
1741  return ret;
1742  }
1743  set_irange( pe_ctx, pe_ctl, option );
1744  return PAPI_OK;
1745 #endif
1746 
1747  case PAPI_DEF_ITIMER:
1748  /* What should we be checking for here? */
1749  /* This seems like it should be OS-specific not component */
1750  /* specific. */
1751 
1752  return PAPI_OK;
1753 
1754  case PAPI_DEF_MPX_NS:
1755  /* Defining a given ns per set is not current supported */
1756  return PAPI_ENOSUPP;
1757 
1758  case PAPI_DEF_ITIMER_NS:
1759  /* We don't support this... */
1760  return PAPI_OK;
1761 
1762  default:
1763  return PAPI_ENOSUPP;
1764  }
1765 }
1766 
1767 
1768 /* Initialize a new control state */
1769 static int
1771 {
1772  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1773 
1774  /* clear the contents */
1775  memset( pe_ctl, 0, sizeof ( pe_control_t ) );
1776 
1777  /* Set the domain */
1778  _pe_set_domain( ctl, _perf_event_vector.cmp_info.default_domain );
1779 
1780  /* default granularity */
1781  pe_ctl->granularity= _perf_event_vector.cmp_info.default_granularity;
1782 
1783  /* overflow signal */
1784  pe_ctl->overflow_signal=_perf_event_vector.cmp_info.hardware_intr_sig;
1785 
1786  pe_ctl->cidx=our_cidx;
1787 
1788  /* Set cpu number in the control block to show events */
1789  /* are not tied to specific cpu */
1790  pe_ctl->cpu = -1;
1791 
1792  return PAPI_OK;
1793 }
1794 
1795 
1796 /****************** EVENT NAME HANDLING CODE *****************/
1797 
1798 static int
1799 _pe_ntv_enum_events( unsigned int *PapiEventCode, int modifier )
1800 {
1801  return _pe_libpfm4_ntv_enum_events(PapiEventCode, modifier, our_cidx,
1803 }
1804 
1805 static int
1806 _pe_ntv_name_to_code( const char *name, unsigned int *event_code)
1807 {
1808  return _pe_libpfm4_ntv_name_to_code(name,event_code, our_cidx,
1810 }
1811 
1812 static int
1813 _pe_ntv_code_to_name(unsigned int EventCode,
1814  char *ntv_name, int len)
1815 {
1816  return _pe_libpfm4_ntv_code_to_name(EventCode,
1817  ntv_name, len,
1819 }
1820 
1821 static int
1822 _pe_ntv_code_to_descr( unsigned int EventCode,
1823  char *ntv_descr, int len)
1824 {
1825 
1826  return _pe_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len,
1828 }
1829 
1830 static int
1831 _pe_ntv_code_to_info(unsigned int EventCode,
1832  PAPI_event_info_t *info) {
1833 
1834  return _pe_libpfm4_ntv_code_to_info(EventCode, info,
1836 }
1837 
1838 
1839 /*********************** SAMPLING / PROFILING *******************/
1840 
1841 
1842 /* Find a native event specified by a profile index */
1843 static int
1844 find_profile_index( EventSetInfo_t *ESI, int evt_idx, int *flags,
1845  unsigned int *native_index, int *profile_index )
1846 {
1847  int pos, esi_index, count;
1848 
1849  for ( count = 0; count < ESI->profile.event_counter; count++ ) {
1850  esi_index = ESI->profile.EventIndex[count];
1851  pos = ESI->EventInfoArray[esi_index].pos[0];
1852 
1853  if ( pos == evt_idx ) {
1854  *profile_index = count;
1855  *native_index = ESI->NativeInfoArray[pos].ni_event &
1857  *flags = ESI->profile.flags;
1858  SUBDBG( "Native event %d is at profile index %d, flags %d\n",
1859  *native_index, *profile_index, *flags );
1860  return PAPI_OK;
1861  }
1862  }
1863  PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d",
1864  count, ESI->profile.event_counter );
1865  return PAPI_EBUG;
1866 }
1867 
1868 
1869 /* What exactly does this do? */
1870 static int
1871 process_smpl_buf( int evt_idx, ThreadInfo_t **thr, int cidx )
1872 {
1873  int ret, flags, profile_index;
1874  unsigned native_index;
1875  pe_control_t *ctl;
1876 
1877  ret = find_profile_index( ( *thr )->running_eventset[cidx], evt_idx,
1878  &flags, &native_index, &profile_index );
1879  if ( ret != PAPI_OK ) {
1880  return ret;
1881  }
1882 
1883  ctl= (*thr)->running_eventset[cidx]->ctl_state;
1884 
1885  mmap_read( cidx, thr, &(ctl->events[evt_idx]), profile_index );
1886 
1887  return PAPI_OK;
1888 }
1889 
1890 /*
1891  * This function is used when hardware overflows are working or when
1892  * software overflows are forced
1893  */
1894 
1895 static void
1896 _pe_dispatch_timer( int n, hwd_siginfo_t *info, void *uc)
1897 {
1898  ( void ) n; /*unused */
1899  _papi_hwi_context_t hw_context;
1900  int found_evt_idx = -1, fd = info->si_fd;
1901  caddr_t address;
1903  int i;
1904  pe_control_t *ctl;
1905  int cidx = _perf_event_vector.cmp_info.CmpIdx;
1906 
1907  if ( thread == NULL ) {
1908  PAPIERROR( "thread == NULL in _papi_pe_dispatch_timer for fd %d!", fd );
1909  return;
1910  }
1911 
1912  if ( thread->running_eventset[cidx] == NULL ) {
1913  PAPIERROR( "thread->running_eventset == NULL in "
1914  "_papi_pe_dispatch_timer for fd %d!",fd );
1915  return;
1916  }
1917 
1918  if ( thread->running_eventset[cidx]->overflow.flags == 0 ) {
1919  PAPIERROR( "thread->running_eventset->overflow.flags == 0 in "
1920  "_papi_pe_dispatch_timer for fd %d!", fd );
1921  return;
1922  }
1923 
1924  hw_context.si = info;
1925  hw_context.ucontext = ( hwd_ucontext_t * ) uc;
1926 
1927  if ( thread->running_eventset[cidx]->overflow.flags &
1929  address = GET_OVERFLOW_ADDRESS( hw_context );
1930  _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
1931  address, NULL, 0,
1932  0, &thread, cidx );
1933  return;
1934  }
1935 
1936  if ( thread->running_eventset[cidx]->overflow.flags !=
1938  PAPIERROR( "thread->running_eventset->overflow.flags "
1939  "is set to something other than "
1940  "PAPI_OVERFLOW_HARDWARE or "
1941  "PAPI_OVERFLOW_FORCE_SW for fd %d (%#x)",
1942  fd,
1943  thread->running_eventset[cidx]->overflow.flags);
1944  }
1945 
1946  /* convoluted way to get ctl */
1947  ctl= thread->running_eventset[cidx]->ctl_state;
1948 
1949  /* See if the fd is one that's part of the this thread's context */
1950  for( i=0; i < ctl->num_events; i++ ) {
1951  if ( fd == ctl->events[i].event_fd ) {
1952  found_evt_idx = i;
1953  break;
1954  }
1955  }
1956 
1957  if ( found_evt_idx == -1 ) {
1958  PAPIERROR( "Unable to find fd %d among the open event fds "
1959  "_papi_hwi_dispatch_timer!", fd );
1960  return;
1961  }
1962 
1963  if (ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ) == -1 ) {
1964  PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed");
1965  }
1966 
1967  if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) &&
1968  !( thread->running_eventset[cidx]->profile.flags &
1969  PAPI_PROFIL_FORCE_SW ) ) {
1970  process_smpl_buf( found_evt_idx, &thread, cidx );
1971  }
1972  else {
1973  uint64_t ip;
1974  unsigned int head;
1975  pe_event_info_t *pe = &(ctl->events[found_evt_idx]);
1976  unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize( );
1977 
1978  /*
1979  * Read up the most recent IP from the sample in the mmap buffer. To
1980  * do this, we make the assumption that all of the records in the
1981  * mmap buffer are the same size, and that they all contain the IP as
1982  * their only record element. This means that we can use the
1983  * data_head element from the user page and move backward one record
1984  * from that point and read the data. Since we don't actually need
1985  * to access the header of the record, we can just subtract 8 (size
1986  * of the IP) from data_head and read up that word from the mmap
1987  * buffer. After we subtract 8, we account for mmap buffer wrapping
1988  * by AND'ing this offset with the buffer mask.
1989  */
1990  head = mmap_read_head( pe );
1991 
1992  if ( head == 0 ) {
1993  PAPIERROR( "Attempting to access memory "
1994  "which may be inaccessable" );
1995  return;
1996  }
1997  ip = *( uint64_t * ) ( data + ( ( head - 8 ) & pe->mask ) );
1998  /*
1999  * Update the tail to the current head pointer.
2000  *
2001  * Note: that if we were to read the record at the tail pointer,
2002  * rather than the one at the head (as you might otherwise think
2003  * would be natural), we could run into problems. Signals don't
2004  * stack well on Linux, particularly if not using RT signals, and if
2005  * they come in rapidly enough, we can lose some. Overtime, the head
2006  * could catch up to the tail and monitoring would be stopped, and
2007  * since no more signals are coming in, this problem will never be
2008  * resolved, resulting in a complete loss of overflow notification
2009  * from that point on. So the solution we use here will result in
2010  * only the most recent IP value being read every time there are two
2011  * or more samples in the buffer (for that one overflow signal). But
2012  * the handler will always bring up the tail, so the head should
2013  * never run into the tail.
2014  */
2015  mmap_write_tail( pe, head );
2016 
2017  /*
2018  * The fourth parameter is supposed to be a vector of bits indicating
2019  * the overflowed hardware counters, but it's not really clear that
2020  * it's useful, because the actual hardware counters used are not
2021  * exposed to the PAPI user. For now, I'm just going to set the bit
2022  * that indicates which event register in the array overflowed. The
2023  * result is that the overflow vector will not be identical to the
2024  * perfmon implementation, and part of that is due to the fact that
2025  * which hardware register is actually being used is opaque at the
2026  * user level (the kernel event dispatcher hides that info).
2027  */
2028 
2029  _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
2030  ( caddr_t ) ( unsigned long ) ip,
2031  NULL, ( 1 << found_evt_idx ), 0,
2032  &thread, cidx );
2033 
2034  }
2035 
2036  /* Restart the counters */
2037  if (ioctl( fd, PERF_EVENT_IOC_REFRESH, PAPI_REFRESH_VALUE ) == -1) {
2038  PAPIERROR( "overflow refresh failed", 0 );
2039  }
2040 }
2041 
2042 /* Stop profiling */
2043 /* FIXME: does this actually stop anything? */
2044 /* It looks like it is only actually called from PAPI_stop() */
2045 /* So the event will be destroyed soon after anyway. */
2046 static int
2048 {
2049  int i, ret = PAPI_OK;
2050  pe_control_t *ctl;
2051  int cidx;
2052 
2053  ctl=ESI->ctl_state;
2054 
2055  cidx=ctl->cidx;
2056 
2057  /* Loop through all of the events and process those which have mmap */
2058  /* buffers attached. */
2059  for ( i = 0; i < ctl->num_events; i++ ) {
2060  /* Use the mmap_buf field as an indicator */
2061  /* of this fd being used for profiling. */
2062  if ( ctl->events[i].profiling ) {
2063  /* Process any remaining samples in the sample buffer */
2064  ret = process_smpl_buf( i, &thread, cidx );
2065  if ( ret ) {
2066  PAPIERROR( "process_smpl_buf returned error %d", ret );
2067  return ret;
2068  }
2069  ctl->events[i].profiling=0;
2070  }
2071  }
2072 
2073  return ret;
2074 }
2075 
2076 /* Set up an event to cause overflow */
2077 /* If threshold==0 then disable overflow for that event */
2078 static int
2079 _pe_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
2080 {
2081  SUBDBG("ENTER: ESI: %p, EventIndex: %d, threshold: %d\n",
2082  ESI, EventIndex, threshold);
2083 
2084  pe_context_t *ctx;
2085  pe_control_t *ctl = (pe_control_t *) ( ESI->ctl_state );
2086  int i, evt_idx, found_non_zero_sample_period = 0, retval = PAPI_OK;
2087  int cidx;
2088 
2089  cidx = ctl->cidx;
2090  ctx = ( pe_context_t *) ( ESI->master->context[cidx] );
2091 
2092  /* pos[0] is the first native event */
2093  /* derived events might be made up of multiple native events */
2094  evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
2095 
2096  SUBDBG("Attempting to set overflow for index %d (%d) of EventSet %d\n",
2097  evt_idx,EventIndex,ESI->EventSetIndex);
2098 
2099  if (evt_idx<0) {
2100  SUBDBG("EXIT: evt_idx: %d\n", evt_idx);
2101  return PAPI_EINVAL;
2102  }
2103 
2104  /* It's an error to disable overflow if it wasn't set in the */
2105  /* first place. */
2106  if (( threshold == 0 ) &&
2107  ( ctl->events[evt_idx].attr.sample_period == 0 ) ) {
2108  SUBDBG("EXIT: PAPI_EINVAL, Tried to clear "
2109  "sample threshold when it was not set\n");
2110  return PAPI_EINVAL;
2111  }
2112 
2113  /* Set the sample period to threshold */
2114  ctl->events[evt_idx].attr.sample_period = threshold;
2115 
2116  if (threshold == 0) {
2117  ctl->events[evt_idx].sampling = 0;
2118  }
2119  else {
2120  ctl->events[evt_idx].sampling = 1;
2121 
2122  /* Setting wakeup_events to one means issue a wakeup on every */
2123  /* counter overflow (not mmap page overflow). */
2124  ctl->events[evt_idx].attr.wakeup_events = 1;
2125  /* We need the IP to pass to the overflow handler */
2126  ctl->events[evt_idx].attr.sample_type = PERF_SAMPLE_IP;
2127  }
2128 
2129 
2130  /* Check to see if any events in the EventSet are setup to sample */
2131  /* Do we actually handle multiple overflow events at once? --vmw */
2132  for ( i = 0; i < ctl->num_events; i++ ) {
2133  if ( ctl->events[i].attr.sample_period ) {
2134  found_non_zero_sample_period = 1;
2135  break;
2136  }
2137  }
2138 
2139  if ( found_non_zero_sample_period ) {
2140  /* turn on internal overflow flag for this event set */
2141  ctl->overflow = 1;
2142 
2143  /* Enable the signal handler */
2145  ctl->overflow_signal,
2146  1, ctl->cidx );
2147  if (retval != PAPI_OK) {
2148  SUBDBG("Call to _papi_hwi_start_signal "
2149  "returned: %d\n", retval);
2150  }
2151  } else {
2152 
2153  /* turn off internal overflow flag for this event set */
2154  ctl->overflow = 0;
2155 
2156  /* Remove the signal handler, if there are no remaining */
2157  /* non-zero sample_periods set */
2159  if ( retval != PAPI_OK ) {
2160  SUBDBG("Call to _papi_hwi_stop_signal "
2161  "returned: %d\n", retval);
2162  return retval;
2163  }
2164  }
2165 
2166  retval = _pe_update_control_state( ctl, NULL,
2167  ((pe_control_t *)(ESI->ctl_state) )->num_events,
2168  ctx );
2169 
2170  SUBDBG("EXIT: return: %d\n", retval);
2171 
2172  return retval;
2173 }
2174 
2175 /* Enable/disable profiling */
2176 /* If threshold is zero, we disable */
2177 static int
2178 _pe_set_profile( EventSetInfo_t *ESI, int EventIndex, int threshold )
2179 {
2180  int ret;
2181  int evt_idx;
2182  pe_control_t *ctl = ( pe_control_t *) ( ESI->ctl_state );
2183 
2184  /* Since you can't profile on a derived event, */
2185  /* the event is always the first and only event */
2186  /* in the native event list. */
2187  evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
2188 
2189  /* If threshold is zero we want to *disable* */
2190  /* profiling on the event */
2191  if ( threshold == 0 ) {
2192 // SUBDBG( "MUNMAP(%p,%"PRIu64")\n",
2193 // ctl->events[evt_idx].mmap_buf,
2194 // ( uint64_t ) ctl->events[evt_idx].nr_mmap_pages *
2195 // getpagesize() );
2196 
2197 // if ( ctl->events[evt_idx].mmap_buf ) {
2198 // munmap( ctl->events[evt_idx].mmap_buf,
2199 // ctl->events[evt_idx].nr_mmap_pages *
2200 // getpagesize() );
2201 // }
2202 // ctl->events[evt_idx].mmap_buf = NULL;
2203 // ctl->events[evt_idx].nr_mmap_pages = 0;
2204 
2205  /* no longer sample on IP */
2206  ctl->events[evt_idx].attr.sample_type &= ~PERF_SAMPLE_IP;
2207 
2208  /* Clear any residual overflow flags */
2209  /* ??? old warning says "This should be handled somewhere else" */
2210  ESI->state &= ~( PAPI_OVERFLOWING );
2211  ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE );
2212 
2213  ctl->events[evt_idx].profiling=0;
2214 
2215  } else {
2216 
2217  /* Otherwise, we are *enabling* profiling */
2218 
2219  /* Look up the native event code */
2220 
2221  if ( ESI->profile.flags & (PAPI_PROFIL_DATA_EAR |
2223  /* Not supported yet... */
2224  return PAPI_ENOSUPP;
2225  }
2226 
2227  if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) {
2228  /* This requires an ability to randomly alter the */
2229  /* sample_period within a given range. */
2230  /* Linux currently does not have this ability. FIXME */
2231  return PAPI_ENOSUPP;
2232  }
2233  ctl->events[evt_idx].profiling=1;
2234  }
2235 
2236  ret = _pe_set_overflow( ESI, EventIndex, threshold );
2237  if ( ret != PAPI_OK ) return ret;
2238 
2239  return PAPI_OK;
2240 }
2241 
2242 
2243 /************ INITIALIZATION / SHUTDOWN CODE *********************/
2244 
2245 
2246 /* Shutdown the perf_event component */
2247 static int
2249 
2250  /* deallocate our event table */
2251  _pe_libpfm4_shutdown(&_perf_event_vector, &perf_native_event_table);
2252 
2253  /* Shutdown libpfm4 */
2254  _papi_libpfm4_shutdown(&_perf_event_vector);
2255 
2256  return PAPI_OK;
2257 }
2258 
2259 
2260 /* Check the mmap page for rdpmc support */
2261 static int _pe_detect_rdpmc(void) {
2262 
2263  struct perf_event_attr pe;
2264  int fd,rdpmc_exists=1;
2265  void *addr;
2266  struct perf_event_mmap_page *our_mmap;
2267  int page_size=getpagesize();
2268 
2269 #if defined(__i386__) || defined (__x86_64__)
2270 #else
2271  /* We only support rdpmc on x86 for now */
2272  return 0;
2273 #endif
2274 
2275  /* There were various subtle bugs in rdpmc support before */
2276  /* the Linux 4.13 release. */
2277  if (_papi_os_info.os_version < LINUX_VERSION(4,13,0)) {
2278  return 0;
2279  }
2280 
2281  /* Create a fake instructions event so we can read a mmap page */
2282  memset(&pe,0,sizeof(struct perf_event_attr));
2283 
2284  pe.type=PERF_TYPE_HARDWARE;
2285  pe.size=sizeof(struct perf_event_attr);
2286  pe.config=PERF_COUNT_HW_INSTRUCTIONS;
2287  pe.exclude_kernel=1;
2288  pe.disabled=1;
2289 
2290  perf_event_dump_attr(&pe,0,-1,-1,0);
2291  fd=sys_perf_event_open(&pe,0,-1,-1,0);
2292 
2293  /* This hopefully won't happen? */
2294  /* Though there is a chance this is the first */
2295  /* attempt to open a perf_event */
2296  if (fd<0) {
2297  SUBDBG("FAILED perf_event_open trying to detect rdpmc support");
2298  return PAPI_ESYS;
2299  }
2300 
2301  /* create the mmap page */
2302  addr=mmap(NULL, page_size, PROT_READ, MAP_SHARED,fd,0);
2303  if (addr == MAP_FAILED) {
2304  SUBDBG("FAILED mmap trying to detect rdpmc support");
2305  close(fd);
2306  return PAPI_ESYS;
2307  }
2308 
2309  /* get the rdpmc info from the mmap page */
2310  our_mmap=(struct perf_event_mmap_page *)addr;
2311 
2312  /* If cap_usr_rdpmc bit is set to 1, we have support! */
2313  if (our_mmap->cap_usr_rdpmc!=0) {
2314  rdpmc_exists=1;
2315  }
2316  else if ((!our_mmap->cap_bit0_is_deprecated) && (our_mmap->cap_bit0)) {
2317  /* 3.4 to 3.11 had somewhat broken rdpmc support */
2318  /* This convoluted test is the "official" way to detect this */
2319  /* To make things easier we don't support these kernels */
2320  rdpmc_exists=0;
2321  }
2322  else {
2323  rdpmc_exists=0;
2324  }
2325 
2326  /* close the fake event */
2327  munmap(addr,page_size);
2328  close(fd);
2329 
2330  return rdpmc_exists;
2331 
2332 }
2333 
2334 
2335 static int
2337 
2338  FILE *fff;
2339  int paranoid_level;
2340  int retval;
2341 
2342  /* The is the official way to detect if perf_event support exists */
2343  /* The file is called perf_counter_paranoid on 2.6.31 */
2344  /* currently we are lazy and do not support 2.6.31 kernels */
2345 
2346  fff=fopen("/proc/sys/kernel/perf_event_paranoid","r");
2347  if (fff==NULL) {
2348  strncpy(component->cmp_info.disabled_reason,
2349  "perf_event support not detected",PAPI_MAX_STR_LEN);
2350  return PAPI_ENOCMP;
2351  }
2352 
2353  /* 3 (vendor patch) means completely disabled */
2354  /* 2 means no kernel measurements allowed */
2355  /* 1 means normal counter access */
2356  /* 0 means you can access CPU-specific data */
2357  /* -1 means no restrictions */
2358  retval=fscanf(fff,"%d",&paranoid_level);
2359  if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
2360  fclose(fff);
2361 
2362  if (paranoid_level==3) {
2363  strncpy(component->cmp_info.disabled_reason,
2364  "perf_event support disabled by Linux with paranoid=3",PAPI_MAX_STR_LEN);
2365  return PAPI_ENOCMP;
2366  }
2367 
2368  if ((paranoid_level==2) && (getuid()!=0)) {
2369  SUBDBG("/proc/sys/kernel/perf_event_paranoid prohibits kernel counts");
2371  }
2372 
2373  return PAPI_OK;
2374 
2375 }
2376 
2377 #if (OBSOLETE_WORKAROUNDS==1)
2378 /* Version based workarounds */
2379 /* perf_event has many bugs */
2380 /* PAPI has to work around a number of them, but for the most part */
2381 /* all of those were fixed by Linux 2.6.34 (May 2010) */
2382 /* Unfortunately it's not easy to auto-detect for these so we were */
2383 /* going by uname() version number */
2384 /* To complicate things, some vendors like Redhat backport fixes */
2385 /* So even though their kernel reports as 2.6.32 it has the fixes */
2386 /* As of PAPI 5.6 we're going to default to disabling the workarounds */
2387 /* I'm going to leave them here, ifdefed out, for the time being */
2388 static int
2389 _pe_version_workarounds(papi_vector_t *component) {
2390 
2391  /* Kernel multiplexing is broken prior to kernel 2.6.34 */
2392  /* The fix was probably git commit: */
2393  /* 45e16a6834b6af098702e5ea6c9a40de42ff77d8 */
2394  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) {
2395  component->cmp_info.kernel_multiplex = 0;
2397  }
2398 
2399  /* Check that processor is supported */
2400  if (processor_supported(_papi_hwi_system_info.hw_info.vendor,
2402  fprintf(stderr,"warning, your processor is unsupported\n");
2403  /* should not return error, as software events should still work */
2404  }
2405 
2406  /* Update the default function pointers */
2407  /* Based on features/bugs */
2408  if (bug_sync_read()) {
2409  component->read = _pe_read_bug_sync;
2410  }
2411 
2412  return PAPI_OK;
2413 
2414 }
2415 
2416 #endif
2417 
2418 
2419 
2420 
2421 /* Initialize the perf_event component */
2422 static int
2424 {
2425 
2426  int retval;
2427 
2428  our_cidx=cidx;
2429 
2430  /* Update component behavior based on paranoid setting */
2431  retval=_pe_handle_paranoid(_papi_hwd[cidx]);
2432  if (retval!=PAPI_OK) return retval;
2433 
2434 #if (OBSOLETE_WORKAROUNDS==1)
2435  /* Handle any kernel version related workarounds */
2436  _pe_version_workarounds(_papi_hwd[cidx]);
2437 #endif
2438 
2439  /* Setup mmtimers, if appropriate */
2440  retval=mmtimer_setup();
2441  if (retval) {
2442  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2443  "Error initializing mmtimer",PAPI_MAX_STR_LEN);
2444  return retval;
2445  }
2446 
2447  /* Set the overflow signal */
2448  _papi_hwd[cidx]->cmp_info.hardware_intr_sig = SIGRTMIN + 2;
2449 
2450  /* Run Vendor-specific fixups */
2451  pe_vendor_fixups(_papi_hwd[cidx]);
2452 
2453  /* Detect if we can use rdpmc (or equivalent) */
2454  retval=_pe_detect_rdpmc();
2456  if (retval < 0 ) {
2457  /* Don't actually fail here, as could be a surivable bug? */
2458  /* If perf_event_open/mmap truly are failing we will */
2459  /* likely catch it pretty quickly elsewhere. */
2461  }
2462 
2463 #if (USE_PERFEVENT_RDPMC==1)
2464 
2465 #else
2466  /* Force fast_counter_read off if --enable-perfevent-rdpmc=no */
2468 #endif
2469 
2470  /* Run the libpfm4-specific setup */
2471  retval = _papi_libpfm4_init(_papi_hwd[cidx]);
2472  if (retval) {
2473 
2474  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2475  "Error initializing libpfm4",PAPI_MAX_STR_LEN);
2476  return retval;
2477 
2478  }
2479 
2480  /* Now that libpfm4 is initialized */
2481  /* Try to setup the perf_event component events */
2482 
2483  retval = _pe_libpfm4_init(_papi_hwd[cidx], cidx,
2486  if (retval) {
2487  switch(retval) {
2488  case PAPI_ENOMEM:
2489  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2490  "Error libpfm4 memory allocation",
2492  break;
2493  case PAPI_ENOSUPP:
2494  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2495  "Error libpfm4 no PMUs found",
2497  break;
2498  case PAPI_ENOCMP:
2499  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2500  "Error libpfm4 no default PMU found",
2502  break;
2503  case PAPI_ECOUNT:
2504  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2505  "Error libpfm4 too many default PMUs found",
2507  break;
2508  case PAPI_ENOEVNT:
2509  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2510  "Error loading preset events",
2512  break;
2513  default:
2514  printf("PAPI error %d\n",retval);
2515  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2516  "Unknown libpfm4 related error",
2518 
2519  }
2520  return retval;
2521  }
2522 
2523  /* Detect NMI watchdog which can steal counters */
2524  /* FIXME: on Intel we should also halve the count if SMT enabled */
2526  if (_papi_hwd[cidx]->cmp_info.num_cntrs>0) {
2528  }
2529  SUBDBG("The Linux nmi_watchdog is using one of the performance "
2530  "counters, reducing the total number available.\n");
2531  }
2532 
2533  /* check for exclude_guest issue */
2535 
2536  return PAPI_OK;
2537 
2538 }
2539 
2540 
2541 
2542 /* Our component vector */
2543 
2544 papi_vector_t _perf_event_vector = {
2545  .cmp_info = {
2546  /* component information (unspecified values initialized to 0) */
2547  .name = "perf_event",
2548  .short_name = "perf",
2549  .version = "5.0",
2550  .description = "Linux perf_event CPU counters",
2551 
2552  .default_domain = PAPI_DOM_USER,
2553  .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR,
2554  .default_granularity = PAPI_GRN_THR,
2555  .available_granularities = PAPI_GRN_THR | PAPI_GRN_SYS,
2556 
2557  .hardware_intr = 1,
2558  .kernel_profile = 1,
2559 
2560  /* component specific cmp_info initializations */
2561  .fast_virtual_timer = 0,
2562  .attach = 1,
2563  .attach_must_ptrace = 1,
2564  .cpu = 1,
2565  .inherit = 1,
2566  .cntr_umasks = 1,
2567 
2568  .kernel_multiplex = 1,
2569  .num_mpx_cntrs = PERF_EVENT_MAX_MPX_COUNTERS,
2570 
2571 
2572  },
2573 
2574  /* sizes of framework-opaque component-private structures */
2575  .size = {
2576  .context = sizeof ( pe_context_t ),
2577  .control_state = sizeof ( pe_control_t ),
2578  .reg_value = sizeof ( int ),
2579  .reg_alloc = sizeof ( int ),
2580  },
2581 
2582  /* function pointers in this component */
2583  .init_component = _pe_init_component,
2584  .shutdown_component = _pe_shutdown_component,
2585  .init_thread = _pe_init_thread,
2586  .init_control_state = _pe_init_control_state,
2587  .dispatch_timer = _pe_dispatch_timer,
2588 
2589  /* function pointers from the shared perf_event lib */
2590  .start = _pe_start,
2591  .stop = _pe_stop,
2592  .read = _pe_read,
2593  .shutdown_thread = _pe_shutdown_thread,
2594  .ctl = _pe_ctl,
2595  .update_control_state = _pe_update_control_state,
2596  .set_domain = _pe_set_domain,
2597  .reset = _pe_reset,
2598  .set_overflow = _pe_set_overflow,
2599  .set_profile = _pe_set_profile,
2600  .stop_profiling = _pe_stop_profiling,
2601  .write = _pe_write,
2602 
2603 
2604  /* from counter name mapper */
2605  .ntv_enum_events = _pe_ntv_enum_events,
2606  .ntv_name_to_code = _pe_ntv_name_to_code,
2607  .ntv_code_to_name = _pe_ntv_code_to_name,
2608  .ntv_code_to_descr = _pe_ntv_code_to_descr,
2609  .ntv_code_to_info = _pe_ntv_code_to_info,
2610 };
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:629
i inherit inherit
#define PAPI_ENOEVNT
Definition: papi.h:260
ssize_t read(int fd, void *buf, size_t count)
Definition: appio.c:225
static int _pe_init_component(int cidx)
Definition: perf_event.c:2423
long long counts[PERF_EVENT_MAX_MPX_COUNTERS]
int _papi_hwi_get_ntv_idx(unsigned int papi_evt_code)
_papi_int_inherit_t inherit
static int process_smpl_buf(int evt_idx, ThreadInfo_t **thr, int cidx)
Definition: perf_event.c:1871
int errno
int close(int fd)
Definition: appio.c:175
#define PAPI_OVERFLOWING
Definition: papi.h:378
#define PAPI_CPU_ATTACH
Definition: papi.h:457
#define PERF_EVENT_MAX_MPX_COUNTERS
Definition: perf_event_lib.h:5
EventSetInfo_t * ESI
static int close_pe_events(pe_context_t *ctx, pe_control_t *ctl)
Definition: perf_event.c:903
struct native_event_t * native_events
unsigned int granularity
long long flags
Definition: iozone.c:12330
#define PAPI_DEF_ITIMER_NS
Definition: papi.h:455
#define _papi_getcpu()
Definition: linux-common.h:46
EventSetInfo_t * ESI
int _papi_libpfm4_init(papi_vector_t *my_vector)
struct in_addr * ip
Definition: iozone.c:20416
int(* read)(hwd_context_t *, hwd_control_state_t *, long long **, int)
Definition: papi_vector.h:30
static int _pe_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *from)
Definition: perf_event.c:1049
#define PAPI_INSTR_ADDRESS
Definition: papi.h:453
gc head
Definition: libasync.c:669
#define PAPI_PROFIL_DATA_EAR
Definition: papi.h:404
static int _pe_set_domain(hwd_control_state_t *ctl, int domain)
Definition: perf_event.c:973
#define PAPI_DEF_MPX_NS
Definition: papi.h:436
cpu
Definition: iozone.c:3872
_papi_int_addr_range_t address_range
#define READ_BUFFER_SIZE
Definition: perf_event.c:493
int default_granularity
Definition: papi.h:645
#define PAPI_ENOSUPP
Definition: papi.h:271
static int _pe_shutdown_thread(hwd_context_t *ctx)
Definition: perf_event.c:988
static unsigned long long mmap_read_self(void *addr, unsigned long long *en, unsigned long long *ru)
Definition: perf_helpers.h:154
static int _pe_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1423
#define PAPI_DATA_ADDRESS
Definition: papi.h:452
#define PAPI_DOM_KERNEL
Definition: papi.h:300
int fd
Definition: iozone.c:1291
#define PAPI_REFRESH_VALUE
Definition: perf_event.c:83
static int bug_format_group(void)
Definition: perf_event.c:175
static int _pe_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len)
Definition: perf_event.c:1822
EventSetInfo_t * ESI
static int set_irange(hwd_context_t *ctx, hwd_control_state_t *current_state, _papi_int_option_t *option)
Definition: perfmon-ia64.c:919
static int _pe_read_multiplexed(pe_control_t *pe_ctl)
Definition: perf_event.c:1123
pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]
int _pe_libpfm4_ntv_enum_events(unsigned int *PapiEventCode, int modifier, int cidx, struct native_event_table_t *event_table)
int _pe_libpfm4_ntv_name_to_code(const char *name, unsigned int *event_code, int cidx, struct native_event_table_t *event_table)
#define PERF_EVENTS_RUNNING
Definition: perf_event.c:65
static int _pe_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
Definition: perf_event.c:1831
#define PAPI_EBUG
Definition: papi.h:259
return PAPI_OK
Definition: linux-nvml.c:497
int count
Definition: iozone.c:22422
#define PMU_TYPE_OS
static int find_profile_index(EventSetInfo_t *ESI, int evt_idx, int *flags, unsigned int *native_index, int *profile_index)
Definition: perf_event.c:1844
static pid_t mygettid(void)
Definition: darwin-common.h:11
fclose(thread_wqfd)
#define PAPI_DOM_USER
Definition: papi.h:298
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:436
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define PAPI_MAX_SW_MPX_EVENTS
Definition: sw_multiplex.h:4
static int check_permissions(unsigned long tid, unsigned int cpu_num, unsigned int domain, unsigned int granularity, unsigned int multiplex, unsigned int inherit)
Definition: perf_event.c:432
static int set_default_domain(EventSetInfo_t *zero, int domain)
Definition: aix.c:510
#define PAPI_EPERM
Definition: papi.h:268
papi_vector_t * _papi_hwd[]
#define PAPI_INHERIT
Definition: papi.h:458
static int _pe_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: perf_event.c:1465
Return codes and api definitions.
uint32_t nr_mmap_pages
FILE * fff[MAX_EVENTS]
int _pe_libpfm4_init(papi_vector_t *component, int cidx, struct native_event_table_t *event_table, int pmu_type)
unsigned int domain
char events[MAX_EVENTS][BUFSIZ]
int multiplex(void)
Definition: multiplex.c:35
_papi_int_attach_t attach
long long ret
Definition: iozone.c:1346
unsigned int overflow
unsigned long tid
int _pe_libpfm4_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len, struct native_event_table_t *event_table)
papi_vector_t _perf_event_vector
Definition: perf_event.c:68
static int cidx
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:636
_papi_int_cpu_t cpu
int i
Definition: fileop.c:140
static int _pe_set_profile(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perf_event.c:2178
EventSetOverflowInfo_t overflow
#define PAPI_OVERFLOW_HARDWARE
Definition: papi.h:412
unsigned int fast_real_timer
Definition: papi.h:660
static int _pe_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1377
PAPI_os_info_t _papi_os_info
Definition: aix.c:1210
struct _ThreadInfo * master
#define PAPI_VENDOR_IBM
Definition: papi.h:350
static int _pe_rdpmc_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: perf_event.c:1083
static int pe_vendor_fixups(papi_vector_t *vector)
Definition: perf_event.c:112
static int set_up_mmap(pe_control_t *ctl, int evt_idx)
Definition: perf_event.c:624
static int pid
long long page_size
Definition: iozone.c:428
static int check_scheduability(pe_context_t *ctx, pe_control_t *ctl, int idx)
Definition: perf_event.c:503
unsigned int fast_counter_read
Definition: papi.h:659
static int exclude_guest_unsupported
Definition: perf_event.c:73
static int _pe_detect_rdpmc(void)
Definition: perf_event.c:2261
hwd_ucontext_t * ucontext
#define PAPI_ESYS
Definition: papi.h:255
#define PAPI_PROFIL_RANDOM
Definition: papi.h:397
#define PAPI_GRANUL
Definition: papi.h:435
void * thread(void *arg)
Definition: kufrin.c:38
#define PERF_EVENTS_OPENED
Definition: perf_event.c:64
void *long long tid
Definition: iozone.c:18586
static int native
int cpuid_model
Definition: papi.h:793
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_PROFIL_INST_EAR
Definition: papi.h:405
#define PAPI_VENDOR_MIPS
Definition: papi.h:355
_papi_int_granularity_t granularity
#define PAPI_ECNFLCT
Definition: papi.h:261
EventSetInfo_t * ESI
static int configure_fd_for_sampling(pe_control_t *ctl, int evt_idx)
Definition: perf_event.c:578
#define PAPI_DETACH
Definition: papi.h:429
void PAPIERROR(char *format,...)
unsigned int multiplexed
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:403
static int check_exclude_guest(void)
Definition: perf_event.c:272
int mmtimer_setup(void)
Definition: linux-timer.c:130
#define PAPI_ATTACH
Definition: papi.h:447
unsigned int kernel_multiplex
Definition: papi.h:656
#define PAPI_ECMP
Definition: papi.h:256
struct native_event_table_t * event_table
static int _pe_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1022
#define PAPI_VENDOR_ARM
Definition: papi.h:354
#define PMU_TYPE_CORE
#define PAPI_MULTIPLEX
Definition: papi.h:431
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:443
#define PAPI_GRN_THR
Definition: papi.h:362
static void perf_event_dump_attr(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long int flags)
Definition: perf_event.c:319
int _papi_libpfm4_shutdown(papi_vector_t *my_vector)
EventSetInfo_t * ESI
#define PAPI_GRN_SYS_CPU
Definition: papi.h:367
_papi_int_multiplex_t multiplex
static long sys_perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
Definition: perf_helpers.h:21
static int close_event(pe_event_info_t *event)
Definition: perf_event.c:869
char * addr
Definition: iozone.c:12026
NativeInfo_t * NativeInfoArray
EventInfo_t * EventInfoArray
int cpuid_family
Definition: papi.h:792
#define PAPI_ENOMEM
Definition: papi.h:254
static int threshold
#define PAPI_VENDOR_CRAY
Definition: papi.h:351
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:56
static int _pe_stop_profiling(ThreadInfo_t *thread, EventSetInfo_t *ESI)
Definition: perf_event.c:2047
PAPI_hw_info_t hw_info
again struct sockaddr sizeof(struct sockaddr_in))
unsigned int overflow_signal
int _pe_libpfm4_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info, struct native_event_table_t *event_table)
#define PAPI_ENOCMP
Definition: papi.h:270
#define PAPI_DOMAIN
Definition: papi.h:433
#define PAPI_VENDOR_INTEL
Definition: papi.h:348
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
static int _pe_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: perf_event.c:1233
static void _pe_dispatch_timer(int n, hwd_siginfo_t *info, void *uc)
Definition: perf_event.c:1896
int _pe_libpfm4_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len, struct native_event_table_t *event_table)
#define LINUX_VERSION(a, b, c)
Definition: linux-common.h:4
static void mmap_read(int cidx, ThreadInfo_t **thr, pe_event_info_t *pe, int profile_index)
Definition: perf_helpers.h:219
int vendor
Definition: papi.h:787
struct sigcontext hwd_ucontext_t
Definition: aix-context.h:10
unsigned int cpu_num
static int map_perf_event_errors_to_papi(int perf_event_error)
Definition: perf_event.c:383
int _pe_libpfm4_shutdown(papi_vector_t *my_vector, struct native_event_table_t *event_table)
static int _pe_shutdown_component(void)
Definition: perf_event.c:2248
#define PAPI_OVERFLOW_FORCE_SW
Definition: papi.h:411
static int fcntl_setown_fd(int fd)
Definition: perf_event.c:216
static int set_drange(hwd_context_t *ctx, hwd_control_state_t *current_state, _papi_int_option_t *option)
Definition: perfmon-ia64.c:767
#define PAPI_DEF_ITIMER
Definition: papi.h:454
printf("\tTry: -i 0 -i 1 \n\n")
#define PAPI_PROFILING
Definition: papi.h:379
EventSetInfo_t ** running_eventset
Definition: threads.h:30
char * name
Definition: iozone.c:23648
perf_event_attr_t attr
struct perf_event_attr attr
struct native_event_table_t perf_native_event_table
Definition: perf_event.c:71
int
Definition: iozone.c:18528
#define MAP_FAILED
Definition: iozone.c:336
static int our_cidx
Definition: perf_event.c:72
inline_static ThreadInfo_t * _papi_hwi_lookup_thread(int custom_tid)
Definition: threads.h:92
#define PAPI_NATIVE_AND_MASK
#define PAPI_PROFIL_FORCE_SW
Definition: papi.h:403
static int _pe_ntv_name_to_code(const char *name, unsigned int *event_code)
Definition: perf_event.c:1806
unsigned int inherit
int _linux_detect_nmi_watchdog()
Definition: linux-common.c:699
#define F_OWNER_TID
Definition: linux-common.h:28
#define PAPI_MAX_STR_LEN
Definition: papi.h:465
_papi_int_domain_t domain
static int _pe_read_nogroup(pe_control_t *pe_ctl)
Definition: perf_event.c:1195
#define PAPI_GRN_PROCG
Definition: papi.h:365
char model_string[PAPI_MAX_STR_LEN]
Definition: papi.h:790
static int _pe_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: perf_event.c:1595
hwd_siginfo_t * si
#define PAPI_DOM_OTHER
Definition: papi.h:301
static unsigned int get_read_format(unsigned int multiplex, unsigned int inherit, int format_group)
Definition: perf_event.c:241
#define F_SETOWN_EX
Definition: linux-common.h:25
int _papi_hwi_dispatch_overflow_signal(void *papiContext, caddr_t address, int *isHardware, long long overflow_bit, int genOverflowBit, ThreadInfo_t **t, int cidx)
Definition: extras.c:216
static void mmap_write_tail(pe_event_info_t *pe, uint64_t tail)
Definition: perf_helpers.h:192
#define PAPI_DOM_SUPERVISOR
Definition: papi.h:302
EventSetInfo_t * ESI
EventSetProfileInfo_t profile
#define PAPI_GRN_SYS
Definition: papi.h:366
hwd_control_state_t * ctl_state
long j
Definition: iozone.c:19135
static int _pe_handle_paranoid(papi_vector_t *component)
Definition: perf_event.c:2336
ssize_t retval
Definition: libasync.c:338
#define PAPI_ECOUNT
Definition: papi.h:276
static int _pe_init_thread(hwd_context_t *hwd_ctx)
Definition: perf_event.c:999
#define GET_OVERFLOW_ADDRESS(ctx)
Definition: aix-context.h:12
static int _pe_init_control_state(hwd_control_state_t *ctl)
Definition: perf_event.c:1770
static int _pe_ntv_enum_events(unsigned int *PapiEventCode, int modifier)
Definition: perf_event.c:1799
static int open_pe_events(pe_context_t *ctx, pe_control_t *ctl)
Definition: perf_event.c:680
EventSetInfo_t * ESI
static int _pe_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len)
Definition: perf_event.c:1813
#define PAPI_VENDOR_AMD
Definition: papi.h:349
static int _pe_set_overflow(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perf_event.c:2079
static uint64_t mmap_read_head(pe_event_info_t *pe)
Definition: perf_helpers.h:175
#define PAPI_GRN_PROC
Definition: papi.h:364