PAPI  5.4.1.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perf_event.c
Go to the documentation of this file.
1 /*
2 * File: perf_event.c
3 *
4 * Author: Corey Ashford
5 * cjashfor@us.ibm.com
6 * - based upon perfmon.c written by -
7 * Philip Mucci
8 * mucci@cs.utk.edu
9 * Mods: Gary Mohr
10 * gary.mohr@bull.com
11 * Mods: Vince Weaver
12 * vweaver1@eecs.utk.edu
13 * Mods: Philip Mucci
14 * mucci@eecs.utk.edu
15 * Mods: Gary Mohr
16 * gary.mohr@bull.com
17 * Modified the perf_event component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4.
18 * This adds several new event masks, including cpu=, u=, and k= which give the user
19 * the ability to set cpu number to use or control the domain (user, kernel, or both)
20 * in which the counter should be incremented. These are event masks so it is now
21 * possible to have multiple events in the same event set that count activity from
22 * differennt cpu's or count activity in different domains.
23 */
24 
25 
26 #include <fcntl.h>
27 #include <string.h>
28 #include <errno.h>
29 #include <signal.h>
30 #include <syscall.h>
31 #include <sys/utsname.h>
32 #include <sys/mman.h>
33 #include <sys/ioctl.h>
34 
35 /* PAPI-specific includes */
36 #include "papi.h"
37 #include "papi_memory.h"
38 #include "papi_internal.h"
39 #include "papi_vector.h"
40 #include "extras.h"
41 
42 /* libpfm4 includes */
43 #include "papi_libpfm4_events.h"
44 #include "pe_libpfm4_events.h"
45 #include "perfmon/pfmlib.h"
46 #include PEINCLUDE
47 
48 /* Linux-specific includes */
49 #include "mb.h"
50 #include "linux-memory.h"
51 #include "linux-timer.h"
52 #include "linux-common.h"
53 #include "linux-context.h"
54 
55 #include "perf_event_lib.h"
56 
57 /* Defines for ctx->state */
58 #define PERF_EVENTS_OPENED 0x01
59 #define PERF_EVENTS_RUNNING 0x02
60 
61 /* Static globals */
63 
64 /* Forward declaration */
66 
67 /* Globals */
69 static int our_cidx;
70 int
72  return our_cidx;
73 }
74 
75 /* These sentinels tell _pe_set_overflow() how to set the */
76 /* wakeup_events field in the event descriptor record. */
77 
78 #define WAKEUP_COUNTER_OVERFLOW 0
79 #define WAKEUP_PROFILING -1
80 
81 #define WAKEUP_MODE_COUNTER_OVERFLOW 0
82 #define WAKEUP_MODE_PROFILING 1
83 
84 /* The kernel developers say to never use a refresh value of 0 */
85 /* See https://lkml.org/lkml/2011/5/24/172 */
86 /* However, on some platforms (like Power) a value of 1 does not work */
87 /* We're still tracking down why this happens. */
88 
89 #if defined(__powerpc__)
90 #define PAPI_REFRESH_VALUE 0
91 #else
92 #define PAPI_REFRESH_VALUE 1
93 #endif
94 
95 static int _pe_set_domain( hwd_control_state_t *ctl, int domain);
96 
97 /* Check for processor support */
98 /* Can be used for generic checking, though in general we only */
99 /* check for pentium4 here because support was broken for multiple */
100 /* kernel releases and the usual standard detections did not */
101 /* handle this. So we check for pentium 4 explicitly. */
102 static int
103 processor_supported(int vendor, int family) {
104 
105  /* Error out if kernel too early to support p4 */
106  if (( vendor == PAPI_VENDOR_INTEL ) && (family == 15)) {
107  if (_papi_os_info.os_version < LINUX_VERSION(2,6,35)) {
108  PAPIERROR("Pentium 4 not supported on kernels before 2.6.35");
109  return PAPI_ENOSUPP;
110  }
111  }
112  return PAPI_OK;
113 }
114 
115 /* Fix up the config based on what CPU/Vendor we are running on */
116 static int
118 {
119  /* powerpc */
120  /* On IBM and Power6 Machines default domain should include supervisor */
122  vector->cmp_info.available_domains |=
124  if (strcmp(_papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0 ) {
125  vector->cmp_info.default_domain =
127  }
128  }
129 
132  }
133 
136  vector->cmp_info.fast_real_timer = 1;
137  }
138  /* ARM */
140  /* FIXME: this will change with Cortex A15 */
141  vector->cmp_info.available_domains |=
143  vector->cmp_info.default_domain =
145  }
146 
147  /* CRAY */
150  }
151 
152  return PAPI_OK;
153 }
154 
155 
156 
157 /******************************************************************/
158 /******** Kernel Version Dependent Routines **********************/
159 /******************************************************************/
160 
161 /* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch
162  * implementations (e.g. x86) which don't do a static event scheduability
163  * check in sys_perf_event_open.
164  * This was fixed for x86 in the 2.6.33 kernel
165  *
166  * Also! Kernels newer than 2.6.34 will fail in a similar way
167  * if the nmi_watchdog has stolen a performance counter
168  * and we try to use the maximum number of counters.
169  * A sys_perf_event_open() will seem to succeed but will fail
170  * at read time. So re-use this work around code.
171  */
172 static int
174 
175 #if defined(__powerpc__)
176  /* PowerPC not affected by this bug */
177 #elif defined(__mips__)
178  /* MIPS as of kernel 3.1 does not properly detect schedulability */
179  return 1;
180 #else
181  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
182 #endif
183 
184  if (nmi_watchdog_active) return 1;
185 
186  return 0;
187 }
188 
189 /* PERF_FORMAT_GROUP allows reading an entire group's counts at once */
190 /* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results */
191 /* from attached processes. We are lazy and disable it for all cases */
192 /* commit was: 050735b08ca8a016bbace4445fa025b88fee770b */
193 
194 static int
196 
197  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) return 1;
198 
199  /* MIPS, as of version 3.1, does not support this properly */
200 
201 #if defined(__mips__)
202  return 1;
203 #endif
204 
205  return 0;
206 
207 }
208 
209 
210 /* There's a bug prior to Linux 2.6.33 where if you are using */
211 /* PERF_FORMAT_GROUP, the TOTAL_TIME_ENABLED and */
212 /* TOTAL_TIME_RUNNING fields will be zero unless you disable */
213 /* the counters first */
214 static int
216 
217  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
218 
219  return 0;
220 
221 }
222 
223 
224 /* Set the F_SETOWN_EX flag on the fd. */
225 /* This affects which thread an overflow signal gets sent to */
226 /* Handled in a subroutine to handle the fact that the behavior */
227 /* is dependent on kernel version. */
228 static int
230 
231  int ret;
232  struct f_owner_ex fown_ex;
233 
234  /* F_SETOWN_EX is not available until 2.6.32 */
235  if (_papi_os_info.os_version < LINUX_VERSION(2,6,32)) {
236 
237  /* get ownership of the descriptor */
238  ret = fcntl( fd, F_SETOWN, mygettid( ) );
239  if ( ret == -1 ) {
240  PAPIERROR( "cannot fcntl(F_SETOWN) on %d: %s", fd, strerror(errno) );
241  return PAPI_ESYS;
242  }
243  }
244  else {
245  /* set ownership of the descriptor */
246  fown_ex.type = F_OWNER_TID;
247  fown_ex.pid = mygettid();
248  ret = fcntl(fd, F_SETOWN_EX, (unsigned long)&fown_ex );
249 
250  if ( ret == -1 ) {
251  PAPIERROR( "cannot fcntl(F_SETOWN_EX) on %d: %s",
252  fd, strerror( errno ) );
253  return PAPI_ESYS;
254  }
255  }
256  return PAPI_OK;
257 }
258 
259 /* The read format on perf_event varies based on various flags that */
260 /* are passed into it. This helper avoids copying this logic */
261 /* multiple places. */
262 static unsigned int
264  unsigned int inherit,
265  int format_group )
266 {
267  unsigned int format = 0;
268 
269  /* if we need read format options for multiplexing, add them now */
270  if (multiplex) {
271  format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
272  format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
273  }
274 
275  /* if our kernel supports it and we are not using inherit, */
276  /* add the group read options */
277  if ( (!bug_format_group()) && !inherit) {
278  if (format_group) {
279  format |= PERF_FORMAT_GROUP;
280  }
281  }
282 
283  SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n",
284  multiplex, inherit, format_group, format);
285 
286  return format;
287 }
288 
289 /*****************************************************************/
290 /********* End Kernel-version Dependent Routines ****************/
291 /*****************************************************************/
292 
293 /*****************************************************************/
294 /********* Begin perf_event low-level code ***********************/
295 /*****************************************************************/
296 
297 /* In case headers aren't new enough to have __NR_perf_event_open */
298 #ifndef __NR_perf_event_open
299 
300 #ifdef __powerpc__
301 #define __NR_perf_event_open 319
302 #elif defined(__x86_64__)
303 #define __NR_perf_event_open 298
304 #elif defined(__i386__)
305 #define __NR_perf_event_open 336
306 #elif defined(__arm__) 366+0x900000
307 #define __NR_perf_event_open
308 #endif
309 
310 #endif
311 
312 static long
313 sys_perf_event_open( struct perf_event_attr *hw_event, pid_t pid, int cpu,
314  int group_fd, unsigned long flags )
315 {
316  int ret;
317 
318  SUBDBG("sys_perf_event_open(hw_event: %p, pid: %d, cpu: %d, group_fd: %d, flags: %lx\n", hw_event, pid, cpu, group_fd, flags);
319  SUBDBG(" type: %d\n",hw_event->type);
320  SUBDBG(" size: %d\n",hw_event->size);
321  SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",hw_event->config, hw_event->config);
322  SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period);
323  SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type);
324  SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format);
325  SUBDBG(" disabled: %d\n",hw_event->disabled);
326  SUBDBG(" inherit: %d\n",hw_event->inherit);
327  SUBDBG(" pinned: %d\n",hw_event->pinned);
328  SUBDBG(" exclusive: %d\n",hw_event->exclusive);
329  SUBDBG(" exclude_user: %d\n",hw_event->exclude_user);
330  SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel);
331  SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv);
332  SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle);
333  SUBDBG(" mmap: %d\n",hw_event->mmap);
334  SUBDBG(" comm: %d\n",hw_event->comm);
335  SUBDBG(" freq: %d\n",hw_event->freq);
336  SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat);
337  SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec);
338  SUBDBG(" task: %d\n",hw_event->task);
339  SUBDBG(" watermark: %d\n",hw_event->watermark);
340  SUBDBG(" precise_ip: %d\n",hw_event->precise_ip);
341  SUBDBG(" mmap_data: %d\n",hw_event->mmap_data);
342  SUBDBG(" sample_id_all: %d\n",hw_event->sample_id_all);
343  SUBDBG(" exclude_host: %d\n",hw_event->exclude_host);
344  SUBDBG(" exclude_guest: %d\n",hw_event->exclude_guest);
345  SUBDBG(" exclude_callchain_kernel: %d\n",hw_event->exclude_callchain_kernel);
346  SUBDBG(" exclude_callchain_user: %d\n",hw_event->exclude_callchain_user);
347  SUBDBG(" wakeup_events: %"PRIx32" (%"PRIu32")\n", hw_event->wakeup_events, hw_event->wakeup_events);
348  SUBDBG(" bp_type: %"PRIx32" (%"PRIu32")\n", hw_event->bp_type, hw_event->bp_type);
349  SUBDBG(" config1: %"PRIx64" (%"PRIu64")\n", hw_event->config1, hw_event->config1);
350  SUBDBG(" config2: %"PRIx64" (%"PRIu64")\n", hw_event->config2, hw_event->config2);
351  SUBDBG(" branch_sample_type: %"PRIx64" (%"PRIu64")\n", hw_event->branch_sample_type, hw_event->branch_sample_type);
352  SUBDBG(" sample_regs_user: %"PRIx64" (%"PRIu64")\n", hw_event->sample_regs_user, hw_event->sample_regs_user);
353  SUBDBG(" sample_stack_user: %"PRIx32" (%"PRIu32")\n", hw_event->sample_stack_user, hw_event->sample_stack_user);
354 
355  ret =
356  syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags );
357  SUBDBG("Returned %d %d %s\n",ret,
358  ret<0?errno:0,
359  ret<0?strerror(errno):" ");
360  return ret;
361 }
362 
363 
364 static int map_perf_event_errors_to_papi(int perf_event_error) {
365 
366  int ret;
367 
368  /* These mappings are approximate.
369  EINVAL in particular can mean lots of different things */
370  switch(perf_event_error) {
371  case EPERM:
372  case EACCES:
373  ret = PAPI_EPERM;
374  break;
375  case ENODEV:
376  case EOPNOTSUPP:
377  ret = PAPI_ENOSUPP;
378  break;
379  case ENOENT:
380  ret = PAPI_ENOEVNT;
381  break;
382  case ENOSYS:
383  case EAGAIN:
384  case EBUSY:
385  case E2BIG: /* Only happens if attr is the wrong size somehow */
386  case EBADF: /* We are attempting to group with an invalid file descriptor */
387  ret = PAPI_ESYS;
388  break;
389  case ENOMEM:
390  ret = PAPI_ENOMEM;
391  break;
392  case EMFILE: /* Out of file descriptors. Typically max out at 1024 */
393  ret = PAPI_ECOUNT;
394  break;
395  case EINVAL:
396  default:
397  ret = PAPI_EINVAL;
398  break;
399  }
400  return ret;
401 }
402 
403 
405 /* perf_events. */
406 /* We do this by temporarily opening an event with the */
407 /* desired options then closing it again. We use the */
408 /* PERF_COUNT_HW_INSTRUCTION event as a dummy event */
409 /* on the assumption it is available on all */
410 /* platforms. */
411 
412 static int
413 check_permissions( unsigned long tid,
414  unsigned int cpu_num,
415  unsigned int domain,
416  unsigned int granularity,
417  unsigned int multiplex,
418  unsigned int inherit )
419 {
420  int ev_fd;
421  struct perf_event_attr attr;
422 
423  long pid;
424 
425  /* clearing this will set a type of hardware and to count all domains */
426  memset(&attr, '\0', sizeof(attr));
427  attr.read_format = get_read_format(multiplex, inherit, 1);
428 
429  /* set the event id (config field) to instructios */
430  /* (an event that should always exist) */
431  /* This was cycles but that is missing on Niagara */
432  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
433 
434  /* now set up domains this event set will be counting */
435  if (!(domain & PAPI_DOM_SUPERVISOR)) {
436  attr.exclude_hv = 1;
437  }
438  if (!(domain & PAPI_DOM_USER)) {
439  attr.exclude_user = 1;
440  }
441  if (!(domain & PAPI_DOM_KERNEL)) {
442  attr.exclude_kernel = 1;
443  }
444 
445  if (granularity==PAPI_GRN_SYS) {
446  pid = -1;
447  } else {
448  pid = tid;
449  }
450 
451  SUBDBG("Calling sys_perf_event_open() from check_permissions\n");
452 
453  ev_fd = sys_perf_event_open( &attr, pid, cpu_num, -1, 0 );
454  if ( ev_fd == -1 ) {
455  SUBDBG("sys_perf_event_open returned error. Linux says, %s",
456  strerror( errno ) );
458  }
459 
460  /* now close it, this was just to make sure we have permissions */
461  /* to set these options */
462  close(ev_fd);
463  return PAPI_OK;
464 }
465 
466 /* Maximum size we ever expect to read from a perf_event fd */
467 /* (this is the number of 64-bit values) */
468 /* We use this to size the read buffers */
469 /* The three is for event count, time_enabled, time_running */
470 /* and the counter term is count value and count id for each */
471 /* possible counter value. */
472 #define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS))
473 
474 
475 
476 /* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch */
477 /* implementations (e.g. x86 before 2.6.33) which don't do a static event */
478 /* scheduability check in sys_perf_event_open. It is also needed if the */
479 /* kernel is stealing an event, such as when NMI watchdog is enabled. */
480 
481 static int
483 {
484  int retval = 0, cnt = -1;
485  ( void ) ctx; /*unused */
486  long long papi_pe_buffer[READ_BUFFER_SIZE];
487  int i,group_leader_fd;
488 
489  if (bug_check_scheduability()) {
490 
491  /* If the kernel isn't tracking scheduability right */
492  /* Then we need to start/stop/read to force the event */
493  /* to be scheduled and see if an error condition happens. */
494 
495  /* get the proper fd to start */
496  group_leader_fd=ctl->events[idx].group_leader_fd;
497  if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
498 
499  /* start the event */
500  retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
501  if (retval == -1) {
502  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
503  return PAPI_ESYS;
504  }
505 
506  /* stop the event */
507  retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
508  if (retval == -1) {
509  PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
510  return PAPI_ESYS;
511  }
512 
513  /* See if a read returns any results */
514  cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
515  if ( cnt == -1 ) {
516  SUBDBG( "read returned an error! Should never happen.\n" );
517  return PAPI_ESYS;
518  }
519 
520  if ( cnt == 0 ) {
521  /* We read 0 bytes if we could not schedule the event */
522  /* The kernel should have detected this at open */
523  /* but various bugs (including NMI watchdog) */
524  /* result in this behavior */
525 
526  return PAPI_ECNFLCT;
527 
528  } else {
529 
530  /* Reset all of the counters (opened so far) back to zero */
531  /* from the above brief enable/disable call pair. */
532 
533  /* We have to reset all events because reset of group leader */
534  /* does not reset all. */
535  /* we assume that the events are being added one by one and that */
536  /* we do not need to reset higher events (doing so may reset ones */
537  /* that have not been initialized yet. */
538 
539  /* Note... PERF_EVENT_IOC_RESET does not reset time running */
540  /* info if multiplexing, so we should avoid coming here if */
541  /* we are multiplexing the event. */
542  for( i = 0; i < idx; i++) {
543  retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
544  if (retval == -1) {
545  PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
546  "(fd %d)failed",
547  i,ctl->num_events,idx,ctl->events[i].event_fd);
548  return PAPI_ESYS;
549  }
550  }
551  }
552  }
553  return PAPI_OK;
554 }
555 
556 
557 /* Do some extra work on a perf_event fd if we're doing sampling */
558 /* This mostly means setting up the mmap buffer. */
559 static int
560 tune_up_fd( pe_control_t *ctl, int evt_idx )
561 {
562  int ret;
563  void *buf_addr;
564  int fd = ctl->events[evt_idx].event_fd;
565 
566  /* Register that we would like a SIGIO notification when a mmap'd page */
567  /* becomes full. */
568  ret = fcntl( fd, F_SETFL, O_ASYNC | O_NONBLOCK );
569  if ( ret ) {
570  PAPIERROR ( "fcntl(%d, F_SETFL, O_ASYNC | O_NONBLOCK) "
571  "returned error: %s", fd, strerror( errno ) );
572  return PAPI_ESYS;
573  }
574 
575  /* Set the F_SETOWN_EX flag on the fd. */
576  /* This affects which thread an overflow signal gets sent to. */
577  ret=fcntl_setown_fd(fd);
578  if (ret!=PAPI_OK) return ret;
579 
580  /* Set FD_CLOEXEC. Otherwise if we do an exec with an overflow */
581  /* running, the overflow handler will continue into the exec()'d*/
582  /* process and kill it because no signal handler is set up. */
583  ret=fcntl(fd, F_SETFD, FD_CLOEXEC);
584  if (ret) {
585  return PAPI_ESYS;
586  }
587 
588  /* when you explicitely declare that you want a particular signal, */
589  /* even with you use the default signal, the kernel will send more */
590  /* information concerning the event to the signal handler. */
591  /* */
592  /* In particular, it will send the file descriptor from which the */
593  /* event is originating which can be quite useful when monitoring */
594  /* multiple tasks from a single thread. */
595  ret = fcntl( fd, F_SETSIG, ctl->overflow_signal );
596  if ( ret == -1 ) {
597  PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s",
598  ctl->overflow_signal, fd,
599  strerror( errno ) );
600  return PAPI_ESYS;
601  }
602 
603  /* mmap() the sample buffer */
604  buf_addr = mmap( NULL, ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
605  PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
606  if ( buf_addr == MAP_FAILED ) {
607  PAPIERROR( "mmap(NULL,%d,%d,%d,%d,0): %s",
608  ctl->events[evt_idx].nr_mmap_pages * getpagesize( ),
609  PROT_READ, MAP_SHARED, fd, strerror( errno ) );
610  return ( PAPI_ESYS );
611  }
612 
613  SUBDBG( "Sample buffer for fd %d is located at %p\n", fd, buf_addr );
614 
615  /* Set up the mmap buffer and its associated helpers */
616  ctl->events[evt_idx].mmap_buf = (struct perf_counter_mmap_page *) buf_addr;
617  ctl->events[evt_idx].tail = 0;
618  ctl->events[evt_idx].mask = ( ctl->events[evt_idx].nr_mmap_pages - 1 ) *
619  getpagesize() - 1;
620 
621  return PAPI_OK;
622 }
623 
624 
625 
626 /* Open all events in the control state */
627 static int
629 {
630 
631  int i, ret = PAPI_OK;
632  long pid;
633 
634  if (ctl->granularity==PAPI_GRN_SYS) {
635  pid = -1;
636  }
637  else {
638  pid = ctl->tid;
639  }
640 
641  for( i = 0; i < ctl->num_events; i++ ) {
642 
643  ctl->events[i].event_opened=0;
644 
645  /* set up the attr structure. We don't set up all fields here */
646  /* as some have already been set up previously. */
647 
648  /* group leader (event 0) is special */
649  /* If we're multiplexed, everyone is a group leader */
650  if (( i == 0 ) || (ctl->multiplexed)) {
651  ctl->events[i].attr.pinned = !ctl->multiplexed;
652  ctl->events[i].attr.disabled = 1;
653  ctl->events[i].group_leader_fd=-1;
654  ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed,
655  ctl->inherit,
656  !ctl->multiplexed );
657  } else {
658  ctl->events[i].attr.pinned=0;
659  ctl->events[i].attr.disabled = 0;
660  ctl->events[i].group_leader_fd=ctl->events[0].event_fd;
661  ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed,
662  ctl->inherit,
663  0 );
664  }
665 
666 
667  /* try to open */
668  ctl->events[i].event_fd = sys_perf_event_open( &ctl->events[i].attr,
669  pid,
670  ctl->events[i].cpu,
671  ctl->events[i].group_leader_fd,
672  0 /* flags */
673  );
674 
675  /* Try to match Linux errors to PAPI errors */
676  if ( ctl->events[i].event_fd == -1 ) {
677  SUBDBG("sys_perf_event_open returned error on event #%d."
678  " Error: %s\n",
679  i, strerror( errno ) );
681 
682  goto open_pe_cleanup;
683  }
684 
685  SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d,"
686  " group_leader/fd: %d, event_fd: %d,"
687  " read_format: %"PRIu64"\n",
688  pid, ctl->events[i].cpu, ctl->events[i].group_leader_fd,
689  ctl->events[i].event_fd, ctl->events[i].attr.read_format);
690 
691 
692  /* in many situations the kernel will indicate we opened fine */
693  /* yet things will fail later. So we need to double check */
694  /* we actually can use the events we've set up. */
695 
696  /* This is not necessary if we are multiplexing, and in fact */
697  /* we cannot do this properly if multiplexed because */
698  /* PERF_EVENT_IOC_RESET does not reset the time running info */
699  if (!ctl->multiplexed) {
700  ret = check_scheduability( ctx, ctl, i );
701 
702  if ( ret != PAPI_OK ) {
703  /* the last event did open, so we need to bump the counter */
704  /* before doing the cleanup */
705  i++;
706  goto open_pe_cleanup;
707  }
708  }
709  ctl->events[i].event_opened=1;
710  }
711 
712  /* Now that we've successfully opened all of the events, do whatever */
713  /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */
714  /* and so on. */
715  for ( i = 0; i < ctl->num_events; i++ ) {
716 
717  /* If sampling is enabled, hook up signal handler */
718  if ((ctl->events[i].attr.sample_period) && (ctl->events[i].nr_mmap_pages > 0)) {
719  ret = tune_up_fd( ctl, i );
720  if ( ret != PAPI_OK ) {
721  /* All of the fds are open, so we need to clean up all of them */
722  i = ctl->num_events;
723  goto open_pe_cleanup;
724  }
725  } else {
726  /* Make sure this is NULL so close_pe_events works right */
727  ctl->events[i].mmap_buf = NULL;
728  }
729  }
730 
731  /* Set num_evts only if completely successful */
732  ctx->state |= PERF_EVENTS_OPENED;
733 
734  return PAPI_OK;
735 
736 open_pe_cleanup:
737  /* We encountered an error, close up the fds we successfully opened. */
738  /* We go backward in an attempt to close group leaders last, although */
739  /* That's probably not strictly necessary. */
740  while ( i > 0 ) {
741  i--;
742  if (ctl->events[i].event_fd>=0) {
743  close( ctl->events[i].event_fd );
744  ctl->events[i].event_opened=0;
745  }
746  }
747 
748  return ret;
749 }
750 
751 /* Close all of the opened events */
752 static int
754 {
755  int i;
756  int num_closed=0;
757  int events_not_opened=0;
758 
759  /* should this be a more serious error? */
760  if ( ctx->state & PERF_EVENTS_RUNNING ) {
761  SUBDBG("Closing without stopping first\n");
762  }
763 
764  /* Close child events first */
765  for( i=0; i<ctl->num_events; i++ ) {
766 
767  if (ctl->events[i].event_opened) {
768 
769  if (ctl->events[i].group_leader_fd!=-1) {
770  if ( ctl->events[i].mmap_buf ) {
771  if ( munmap ( ctl->events[i].mmap_buf,
772  ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
773  PAPIERROR( "munmap of fd = %d returned error: %s",
774  ctl->events[i].event_fd, strerror( errno ) );
775  return PAPI_ESYS;
776  }
777  }
778 
779  if ( close( ctl->events[i].event_fd ) ) {
780  PAPIERROR( "close of fd = %d returned error: %s",
781  ctl->events[i].event_fd, strerror( errno ) );
782  return PAPI_ESYS;
783  } else {
784  num_closed++;
785  }
786  ctl->events[i].event_opened=0;
787  }
788  }
789  else {
790  events_not_opened++;
791  }
792  }
793 
794  /* Close the group leaders last */
795  for( i=0; i<ctl->num_events; i++ ) {
796 
797  if (ctl->events[i].event_opened) {
798 
799  if (ctl->events[i].group_leader_fd==-1) {
800  if ( ctl->events[i].mmap_buf ) {
801  if ( munmap ( ctl->events[i].mmap_buf,
802  ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
803  PAPIERROR( "munmap of fd = %d returned error: %s",
804  ctl->events[i].event_fd, strerror( errno ) );
805  return PAPI_ESYS;
806  }
807  }
808 
809 
810  if ( close( ctl->events[i].event_fd ) ) {
811  PAPIERROR( "close of fd = %d returned error: %s",
812  ctl->events[i].event_fd, strerror( errno ) );
813  return PAPI_ESYS;
814  } else {
815  num_closed++;
816  }
817  ctl->events[i].event_opened=0;
818  }
819  }
820  }
821 
822 
823  if (ctl->num_events!=num_closed) {
824  if (ctl->num_events!=(num_closed+events_not_opened)) {
825  PAPIERROR("Didn't close all events: "
826  "Closed %d Not Opened: %d Expected %d",
827  num_closed,events_not_opened,ctl->num_events);
828  return PAPI_EBUG;
829  }
830  }
831 
832  ctl->num_events=0;
833 
834  ctx->state &= ~PERF_EVENTS_OPENED;
835 
836  return PAPI_OK;
837 }
838 
839 
840 /********************************************************************/
841 /********************************************************************/
842 /* Functions that are exported via the component interface */
843 /********************************************************************/
844 /********************************************************************/
845 
846 
847 /* set the domain. perf_events allows per-event control of this, papi allows it to be set at the event level or at the event set level. */
848 /* this will set the event set level domain values but they only get used if no event level domain mask (u= or k=) was specified. */
849 static int
851 {
852  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
853 
854  SUBDBG("old control domain %d, new domain %d\n", pe_ctl->domain,domain);
855  pe_ctl->domain = domain;
856  return PAPI_OK;
857 }
858 
859 /* Shutdown a thread */
860 int
862 {
863  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
864 
865  pe_ctx->initialized=0;
866 
867  return PAPI_OK;
868 }
869 
870 
871 /* reset the hardware counters */
872 /* Note: PAPI_reset() does not necessarily call this */
873 /* unless the events are actually running. */
874 int
876 {
877  int i, ret;
878  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
879 
880  ( void ) ctx; /*unused */
881 
882  /* We need to reset all of the events, not just the group leaders */
883  for( i = 0; i < pe_ctl->num_events; i++ ) {
884  ret = ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
885  if ( ret == -1 ) {
886  PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
887  "returned error, Linux says: %s",
888  pe_ctl->events[i].event_fd, strerror( errno ) );
889  return PAPI_ESYS;
890  }
891  }
892 
893  return PAPI_OK;
894 }
895 
896 
897 /* write (set) the hardware counters */
898 /* Current we do not support this. */
899 int
901  long long *from )
902 {
903  ( void ) ctx; /*unused */
904  ( void ) ctl; /*unused */
905  ( void ) from; /*unused */
906  /*
907  * Counters cannot be written. Do we need to virtualize the
908  * counters so that they can be written, or perhaps modify code so that
909  * they can be written? FIXME ?
910  */
911 
912  return PAPI_ENOSUPP;
913 }
914 
915 /*
916  * perf_event provides a complicated read interface.
917  * the info returned by read() varies depending on whether
918  * you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED,
919  * PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set
920  *
921  * To simplify things we just always ask for everything. This might
922  * lead to overhead when reading more than we need, but it makes the
923  * read code a lot simpler than the original implementation we had here.
924  *
925  * For more info on the layout see include/linux/perf_event.h
926  *
927  */
928 
929 int
931  long long **events, int flags )
932 {
933  SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n", ctx, ctl, events, flags);
934 
935  ( void ) flags; /*unused */
936  int i, ret = -1;
937  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
938  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
939  long long papi_pe_buffer[READ_BUFFER_SIZE];
940  long long tot_time_running, tot_time_enabled, scale;
941 
942  /* On kernels before 2.6.33 the TOTAL_TIME_ENABLED and TOTAL_TIME_RUNNING */
943  /* fields are always 0 unless the counter is disabled. So if we are on */
944  /* one of these kernels, then we must disable events before reading. */
945 
946  /* Elsewhere though we disable multiplexing on kernels before 2.6.34 */
947  /* so maybe this isn't even necessary. */
948 
949  if (bug_sync_read()) {
950  if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
951  for ( i = 0; i < pe_ctl->num_events; i++ ) {
952  /* disable only the group leaders */
953  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
954  ret = ioctl( pe_ctl->events[i].event_fd,
955  PERF_EVENT_IOC_DISABLE, NULL );
956  if ( ret == -1 ) {
957  PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) "
958  "returned an error: ", strerror( errno ));
959  return PAPI_ESYS;
960  }
961  }
962  }
963  }
964  }
965 
966 
967  /* Handle case where we are multiplexing */
968  if (pe_ctl->multiplexed) {
969 
970  /* currently we handle multiplexing by having individual events */
971  /* so we read from each in turn. */
972 
973  for ( i = 0; i < pe_ctl->num_events; i++ ) {
974 
975  ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer,
976  sizeof ( papi_pe_buffer ) );
977  if ( ret == -1 ) {
978  PAPIERROR("read returned an error: ", strerror( errno ));
979  return PAPI_ESYS;
980  }
981 
982  /* We should read 3 64-bit values from the counter */
983  if (ret<(signed)(3*sizeof(long long))) {
984  PAPIERROR("Error! short read");
985  return PAPI_ESYS;
986  }
987 
988  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
989  pe_ctl->events[i].event_fd,
990  (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
991  SUBDBG("read: %lld %lld %lld\n",papi_pe_buffer[0],
992  papi_pe_buffer[1],papi_pe_buffer[2]);
993 
994  tot_time_enabled = papi_pe_buffer[1];
995  tot_time_running = papi_pe_buffer[2];
996 
997  SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * "
998  "tot_time_enabled %lld) / tot_time_running %lld\n",
999  i, 0,papi_pe_buffer[0],
1000  tot_time_enabled,tot_time_running);
1001 
1002  if (tot_time_running == tot_time_enabled) {
1003  /* No scaling needed */
1004  pe_ctl->counts[i] = papi_pe_buffer[0];
1005  } else if (tot_time_running && tot_time_enabled) {
1006  /* Scale factor of 100 to avoid overflows when computing */
1007  /*enabled/running */
1008 
1009  scale = (tot_time_enabled * 100LL) / tot_time_running;
1010  scale = scale * papi_pe_buffer[0];
1011  scale = scale / 100LL;
1012  pe_ctl->counts[i] = scale;
1013  } else {
1014  /* This should not happen, but Phil reports it sometime does. */
1015  SUBDBG("perf_event kernel bug(?) count, enabled, "
1016  "running: %lld, %lld, %lld\n",
1017  papi_pe_buffer[0],tot_time_enabled,
1018  tot_time_running);
1019 
1020  pe_ctl->counts[i] = papi_pe_buffer[0];
1021  }
1022  }
1023  }
1024 
1025  /* Handle cases where we cannot use FORMAT GROUP */
1026  else if (bug_format_group() || pe_ctl->inherit) {
1027 
1028  /* we must read each counter individually */
1029  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1030 
1031  ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer,
1032  sizeof ( papi_pe_buffer ) );
1033  if ( ret == -1 ) {
1034  PAPIERROR("read returned an error: ", strerror( errno ));
1035  return PAPI_ESYS;
1036  }
1037 
1038  /* we should read one 64-bit value from each counter */
1039  if (ret!=sizeof(long long)) {
1040  PAPIERROR("Error! short read");
1041  PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d",
1042  pe_ctl->events[i].event_fd,
1043  (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
1044  return PAPI_ESYS;
1045  }
1046 
1047  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1048  pe_ctl->events[i].event_fd, (long)pe_ctl->tid,
1049  pe_ctl->events[i].cpu, ret);
1050  SUBDBG("read: %lld\n",papi_pe_buffer[0]);
1051 
1052  pe_ctl->counts[i] = papi_pe_buffer[0];
1053  }
1054  }
1055 
1056 
1057  /* Handle cases where we are using FORMAT_GROUP */
1058  /* We assume only one group leader, in position 0 */
1059 
1060  else {
1061  if (pe_ctl->events[0].group_leader_fd!=-1) {
1062  PAPIERROR("Was expecting group leader");
1063  }
1064 
1065  ret = read( pe_ctl->events[0].event_fd, papi_pe_buffer,
1066  sizeof ( papi_pe_buffer ) );
1067 
1068  if ( ret == -1 ) {
1069  PAPIERROR("read returned an error: ", strerror( errno ));
1070  return PAPI_ESYS;
1071  }
1072 
1073  /* we read 1 64-bit value (number of events) then */
1074  /* num_events more 64-bit values that hold the counts */
1075  if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) {
1076  PAPIERROR("Error! short read");
1077  return PAPI_ESYS;
1078  }
1079 
1080  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1081  pe_ctl->events[0].event_fd,
1082  (long)pe_ctl->tid, pe_ctl->events[0].cpu, ret);
1083  {
1084  int j;
1085  for(j=0;j<ret/8;j++) {
1086  SUBDBG("read %d: %lld\n",j,papi_pe_buffer[j]);
1087  }
1088  }
1089 
1090  /* Make sure the kernel agrees with how many events we have */
1091  if (papi_pe_buffer[0]!=pe_ctl->num_events) {
1092  PAPIERROR("Error! Wrong number of events");
1093  return PAPI_ESYS;
1094  }
1095 
1096  /* put the count values in their proper location */
1097  for(i=0;i<pe_ctl->num_events;i++) {
1098  pe_ctl->counts[i] = papi_pe_buffer[1+i];
1099  }
1100  }
1101 
1102 
1103  /* If we disabled the counters due to the sync_read_bug(), */
1104  /* then we need to re-enable them now. */
1105  if (bug_sync_read()) {
1106  if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
1107  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1108  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1109  /* this should refresh any overflow counters too */
1110  ret = ioctl( pe_ctl->events[i].event_fd,
1111  PERF_EVENT_IOC_ENABLE, NULL );
1112  if ( ret == -1 ) {
1113  /* Should never happen */
1114  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) returned an error: ",
1115  strerror( errno ));
1116  return PAPI_ESYS;
1117  }
1118  }
1119  }
1120  }
1121  }
1122 
1123  /* point PAPI to the values we read */
1124  *events = pe_ctl->counts;
1125 
1126  SUBDBG("EXIT: *events: %p\n", *events);
1127  return PAPI_OK;
1128 }
1129 
1130 /* Start counting events */
1131 int
1133 {
1134  int ret;
1135  int i;
1136  int did_something = 0;
1137  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1138  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1139 
1140  /* Reset the counters first. Is this necessary? */
1141  ret = _pe_reset( pe_ctx, pe_ctl );
1142  if ( ret ) {
1143  return ret;
1144  }
1145 
1146  /* Enable all of the group leaders */
1147  /* All group leaders have a group_leader_fd of -1 */
1148  for( i = 0; i < pe_ctl->num_events; i++ ) {
1149  if (pe_ctl->events[i].group_leader_fd == -1) {
1150  SUBDBG("ioctl(enable): fd: %d\n", pe_ctl->events[i].event_fd);
1151  ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_ENABLE, NULL) ;
1152 
1153  /* ioctls always return -1 on failure */
1154  if (ret == -1) {
1155  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1156  return PAPI_ESYS;
1157  }
1158 
1159  did_something++;
1160  }
1161  }
1162 
1163  if (!did_something) {
1164  PAPIERROR("Did not enable any counters");
1165  return PAPI_EBUG;
1166  }
1167 
1168  pe_ctx->state |= PERF_EVENTS_RUNNING;
1169 
1170  return PAPI_OK;
1171 
1172 }
1173 
1174 /* Stop all of the counters */
1175 int
1177 {
1178  SUBDBG( "ENTER: ctx: %p, ctl: %p\n", ctx, ctl);
1179 
1180  int ret;
1181  int i;
1182  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1183  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1184 
1185  /* Just disable the group leaders */
1186  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1187  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1188  ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_DISABLE, NULL);
1189  if ( ret == -1 ) {
1190  PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) "
1191  "returned error, Linux says: %s",
1192  pe_ctl->events[i].event_fd, strerror( errno ) );
1193  return PAPI_EBUG;
1194  }
1195  }
1196  }
1197 
1198  pe_ctx->state &= ~PERF_EVENTS_RUNNING;
1199 
1200  SUBDBG( "EXIT:\n");
1201  return PAPI_OK;
1202 }
1203 
1204 /* This function clears the current contents of the control structure and
1205  updates it with whatever resources are allocated for all the native events
1206  in the native info structure array. */
1207 
1208 int
1211  int count, hwd_context_t *ctx )
1212 {
1213  SUBDBG( "ENTER: ctl: %p, native: %p, count: %d, ctx: %p\n", ctl, native, count, ctx);
1214  int i;
1215  int j;
1216  int ret;
1217  int skipped_events=0;
1218  struct native_event_t *ntv_evt;
1219  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1220  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1221 
1222  /* close all of the existing fds and start over again */
1223  /* In theory we could have finer-grained control and know if */
1224  /* things were changed, but it's easier to tear things down and rebuild. */
1225  close_pe_events( pe_ctx, pe_ctl );
1226 
1227  /* Calling with count==0 should be OK, it's how things are deallocated */
1228  /* when an eventset is destroyed. */
1229  if ( count == 0 ) {
1230  SUBDBG( "EXIT: Called with count == 0\n" );
1231  return PAPI_OK;
1232  }
1233 
1234  /* set up all the events */
1235  for( i = 0; i < count; i++ ) {
1236  if ( native ) {
1237  // get the native event pointer used for this papi event
1238  int ntv_idx = _papi_hwi_get_ntv_idx((unsigned)(native[i].ni_papi_code));
1239  if (ntv_idx < -1) {
1240  SUBDBG("papi_event_code: %#x known by papi but not by the component\n", native[i].ni_papi_code);
1241  continue;
1242  }
1243  // if native index is -1, then we have an event without a mask and need to find the right native index to use
1244  if (ntv_idx == -1) {
1245  // find the native event index we want by matching for the right papi event code
1246  for (j=0 ; j<pe_ctx->event_table->num_native_events ; j++) {
1247  if (pe_ctx->event_table->native_events[j].papi_event_code == native[i].ni_papi_code) {
1248  ntv_idx = j;
1249  }
1250  }
1251  }
1252 
1253  // if native index is still negative, we did not find event we wanted so just return error
1254  if (ntv_idx < 0) {
1255  SUBDBG("papi_event_code: %#x not found in native event tables\n", native[i].ni_papi_code);
1256  continue;
1257  }
1258 
1259  // this native index is positive so there was a mask with the event, the ntv_idx identifies which native event to use
1260  ntv_evt = (struct native_event_t *)(&(pe_ctx->event_table->native_events[ntv_idx]));
1261  SUBDBG("ntv_evt: %p\n", ntv_evt);
1262 
1263  SUBDBG("i: %d, pe_ctx->event_table->num_native_events: %d\n", i, pe_ctx->event_table->num_native_events);
1264 
1265  // Move this events hardware config values and other attributes to the perf_events attribute structure
1266  memcpy (&pe_ctl->events[i].attr, &ntv_evt->attr, sizeof(perf_event_attr_t));
1267 
1268  // may need to update the attribute structure with information from event set level domain settings (values set by PAPI_set_domain)
1269  // only done if the event mask which controls each counting domain was not provided
1270 
1271  // get pointer to allocated name, will be NULL when adding preset events to event set
1272  char *aName = ntv_evt->allocated_name;
1273  if ((aName == NULL) || (strstr(aName, ":u=") == NULL)) {
1274  SUBDBG("set exclude_user attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_user, !(pe_ctl->domain & PAPI_DOM_USER));
1275  pe_ctl->events[i].attr.exclude_user = !(pe_ctl->domain & PAPI_DOM_USER);
1276  }
1277  if ((aName == NULL) || (strstr(aName, ":k=") == NULL)) {
1278  SUBDBG("set exclude_kernel attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_kernel, !(pe_ctl->domain & PAPI_DOM_KERNEL));
1279  pe_ctl->events[i].attr.exclude_kernel = !(pe_ctl->domain & PAPI_DOM_KERNEL);
1280  }
1281 
1282  // libpfm4 supports mh (monitor host) and mg (monitor guest) event masks
1283  // perf_events supports exclude_hv and exclude_idle attributes
1284  // PAPI_set_domain supports PAPI_DOM_SUPERVISOR and PAPI_DOM_OTHER domain attributes
1285  // not sure how these perf_event attributes, and PAPI domain attributes relate to each other
1286  // if that can be figured out then there should probably be code here to set some perf_events attributes based on what was set in a PAPI_set_domain call
1287  // the code sample below is one possibility
1288 // if (strstr(ntv_evt->allocated_name, ":mg=") == NULL) {
1289 // SUBDBG("set exclude_hv attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_hv, !(pe_ctl->domain & PAPI_DOM_SUPERVISOR));
1290 // pe_ctl->events[i].attr.exclude_hv = !(pe_ctl->domain & PAPI_DOM_SUPERVISOR);
1291 // }
1292 
1293 
1294  // set the cpu number provided with an event mask if there was one (will be -1 if mask not provided)
1295  pe_ctl->events[i].cpu = ntv_evt->cpu;
1296  // if cpu event mask not provided, then set the cpu to use to what may have been set on call to PAPI_set_opt (will still be -1 if not called)
1297  if (pe_ctl->events[i].cpu == -1) {
1298  pe_ctl->events[i].cpu = pe_ctl->cpu;
1299  }
1300  } else {
1301  // This case happens when called from _pe_set_overflow and _pe_ctl
1302  // Those callers put things directly into the pe_ctl structure so it is already set for the open call
1303  }
1304 
1305  // Copy the inherit flag into the attribute block that will be passed to the kernel
1306  pe_ctl->events[i].attr.inherit = pe_ctl->inherit;
1307 
1308  /* Set the position in the native structure */
1309  /* We just set up events linearly */
1310  if ( native ) {
1311  native[i].ni_position = i;
1312  SUBDBG( "&native[%d]: %p, ni_papi_code: %#x, ni_event: %#x, ni_position: %d, ni_owners: %d\n",
1313  i, &(native[i]), native[i].ni_papi_code, native[i].ni_event, native[i].ni_position, native[i].ni_owners);
1314  }
1315  }
1316 
1317  if (count <= skipped_events) {
1318  SUBDBG("EXIT: No events to count, they all contained invalid umasks\n");
1319  return PAPI_ENOEVNT;
1320  }
1321 
1322  pe_ctl->num_events = count - skipped_events;
1323 
1324  /* actually open the events */
1325  /* (why is this a separate function?) */
1326  ret = open_pe_events( pe_ctx, pe_ctl );
1327  if ( ret != PAPI_OK ) {
1328  SUBDBG("EXIT: open_pe_events returned: %d\n", ret);
1329  /* Restore values ? */
1330  return ret;
1331  }
1332 
1333  SUBDBG( "EXIT: PAPI_OK\n" );
1334  return PAPI_OK;
1335 }
1336 
1337 /* Set various options on a control state */
1338 int
1339 _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
1340 {
1341  int ret;
1342  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1343  pe_control_t *pe_ctl = NULL;
1344 
1345  switch ( code ) {
1346  case PAPI_MULTIPLEX:
1347  pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state );
1348  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
1349  pe_ctl->granularity,
1350  1, pe_ctl->inherit );
1351  if (ret != PAPI_OK) {
1352  return ret;
1353  }
1354 
1355  /* looks like we are allowed, so set multiplexed attribute */
1356  pe_ctl->multiplexed = 1;
1357  ret = _pe_update_control_state( pe_ctl, NULL,
1358  pe_ctl->num_events, pe_ctx );
1359  if (ret != PAPI_OK) {
1360  pe_ctl->multiplexed = 0;
1361  }
1362  return ret;
1363 
1364  case PAPI_ATTACH:
1365  pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state );
1366  ret = check_permissions( option->attach.tid, pe_ctl->cpu,
1367  pe_ctl->domain, pe_ctl->granularity,
1368  pe_ctl->multiplexed,
1369  pe_ctl->inherit );
1370  if (ret != PAPI_OK) {
1371  return ret;
1372  }
1373 
1374  pe_ctl->tid = option->attach.tid;
1375 
1376  /* If events have been already been added, something may */
1377  /* have been done to the kernel, so update */
1378  ret =_pe_update_control_state( pe_ctl, NULL,
1379  pe_ctl->num_events, pe_ctx);
1380 
1381  return ret;
1382 
1383  case PAPI_DETACH:
1384  pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
1385 
1386  pe_ctl->tid = 0;
1387  return PAPI_OK;
1388 
1389  case PAPI_CPU_ATTACH:
1390  pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state );
1391  ret = check_permissions( pe_ctl->tid, option->cpu.cpu_num,
1392  pe_ctl->domain, pe_ctl->granularity,
1393  pe_ctl->multiplexed,
1394  pe_ctl->inherit );
1395  if (ret != PAPI_OK) {
1396  return ret;
1397  }
1398  /* looks like we are allowed so set cpu number */
1399 
1400  /* this tells the kernel not to count for a thread */
1401  /* should we warn if we try to set both? perf_event */
1402  /* will reject it. */
1403  pe_ctl->tid = -1;
1404 
1405  pe_ctl->cpu = option->cpu.cpu_num;
1406 
1407  return PAPI_OK;
1408 
1409  case PAPI_DOMAIN:
1410  pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state );
1411  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu,
1412  option->domain.domain,
1413  pe_ctl->granularity,
1414  pe_ctl->multiplexed,
1415  pe_ctl->inherit );
1416  if (ret != PAPI_OK) {
1417  return ret;
1418  }
1419  /* looks like we are allowed, so set event set level counting domains */
1420  pe_ctl->domain = option->domain.domain;
1421  return PAPI_OK;
1422 
1423  case PAPI_GRANUL:
1424  pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state );
1425 
1426  /* FIXME: we really don't support this yet */
1427 
1428  switch ( option->granularity.granularity ) {
1429  case PAPI_GRN_PROCG:
1430  case PAPI_GRN_SYS_CPU:
1431  case PAPI_GRN_PROC:
1432  return PAPI_ECMP;
1433 
1434  /* Currently we only support thread and CPU granularity */
1435  case PAPI_GRN_SYS:
1436  pe_ctl->granularity=PAPI_GRN_SYS;
1437  break;
1438 
1439  case PAPI_GRN_THR:
1440  pe_ctl->granularity=PAPI_GRN_THR;
1441  break;
1442 
1443 
1444  default:
1445  return PAPI_EINVAL;
1446  }
1447  return PAPI_OK;
1448 
1449  case PAPI_INHERIT:
1450  pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state );
1451  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
1452  pe_ctl->granularity, pe_ctl->multiplexed,
1453  option->inherit.inherit );
1454  if (ret != PAPI_OK) {
1455  return ret;
1456  }
1457  /* looks like we are allowed, so set the requested inheritance */
1458  if (option->inherit.inherit) {
1459  /* children will inherit counters */
1460  pe_ctl->inherit = 1;
1461  } else {
1462  /* children won't inherit counters */
1463  pe_ctl->inherit = 0;
1464  }
1465  return PAPI_OK;
1466 
1467  case PAPI_DATA_ADDRESS:
1468  return PAPI_ENOSUPP;
1469 #if 0
1470  pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
1471  ret = set_default_domain( pe_ctl, option->address_range.domain );
1472  if ( ret != PAPI_OK ) {
1473  return ret;
1474  }
1475  set_drange( pe_ctx, pe_ctl, option );
1476  return PAPI_OK;
1477 #endif
1478  case PAPI_INSTR_ADDRESS:
1479  return PAPI_ENOSUPP;
1480 #if 0
1481  pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
1482  ret = set_default_domain( pe_ctl, option->address_range.domain );
1483  if ( ret != PAPI_OK ) {
1484  return ret;
1485  }
1486  set_irange( pe_ctx, pe_ctl, option );
1487  return PAPI_OK;
1488 #endif
1489 
1490  case PAPI_DEF_ITIMER:
1491  /* What should we be checking for here? */
1492  /* This seems like it should be OS-specific not component */
1493  /* specific. */
1494 
1495  return PAPI_OK;
1496 
1497  case PAPI_DEF_MPX_NS:
1498  /* Defining a given ns per set is not current supported */
1499  return PAPI_ENOSUPP;
1500 
1501  case PAPI_DEF_ITIMER_NS:
1502  /* We don't support this... */
1503  return PAPI_OK;
1504 
1505  default:
1506  return PAPI_ENOSUPP;
1507  }
1508 }
1509 
1510 /* Initialize a thread */
1511 int
1513 {
1514 
1515  pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx;
1516 
1517  /* clear the context structure and mark as initialized */
1518  memset( pe_ctx, 0, sizeof ( pe_context_t ) );
1519  pe_ctx->initialized=1;
1521  pe_ctx->cidx=our_cidx;
1522 
1523  return PAPI_OK;
1524 }
1525 
1526 /* Initialize a new control state */
1527 int
1529 {
1530  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1531 
1532  /* clear the contents */
1533  memset( pe_ctl, 0, sizeof ( pe_control_t ) );
1534 
1535  /* Set the domain */
1536  _pe_set_domain( ctl, _perf_event_vector.cmp_info.default_domain );
1537 
1538  /* default granularity */
1539  pe_ctl->granularity= _perf_event_vector.cmp_info.default_granularity;
1540 
1541  /* overflow signal */
1542  pe_ctl->overflow_signal=_perf_event_vector.cmp_info.hardware_intr_sig;
1543 
1544  pe_ctl->cidx=our_cidx;
1545 
1546  /* Set cpu number in the control block to show events */
1547  /* are not tied to specific cpu */
1548  pe_ctl->cpu = -1;
1549  return PAPI_OK;
1550 }
1551 
1552 /* Check the mmap page for rdpmc support */
1553 static int _pe_detect_rdpmc(int default_domain) {
1554 
1555  struct perf_event_attr pe;
1556  int fd,rdpmc_exists=1;
1557  void *addr;
1558  struct perf_event_mmap_page *our_mmap;
1559 
1560  /* Create a fake instructions event so we can read a mmap page */
1561  memset(&pe,0,sizeof(struct perf_event_attr));
1562 
1563  pe.type=PERF_TYPE_HARDWARE;
1564  pe.size=sizeof(struct perf_event_attr);
1565  pe.config=PERF_COUNT_HW_INSTRUCTIONS;
1566 
1567  /* There should probably be a helper function to handle this */
1568  /* we break on some ARM because there is no support for excluding */
1569  /* kernel. */
1570  if (default_domain & PAPI_DOM_KERNEL ) {
1571  }
1572  else {
1573  pe.exclude_kernel=1;
1574  }
1575  fd=sys_perf_event_open(&pe,0,-1,-1,0);
1576  if (fd<0) {
1577  return PAPI_ESYS;
1578  }
1579 
1580  /* create the mmap page */
1581  addr=mmap(NULL, 4096, PROT_READ, MAP_SHARED,fd,0);
1582  if (addr == (void *)(-1)) {
1583  close(fd);
1584  return PAPI_ESYS;
1585  }
1586 
1587  /* get the rdpmc info */
1588  our_mmap=(struct perf_event_mmap_page *)addr;
1589  if (our_mmap->cap_usr_rdpmc==0) {
1590  rdpmc_exists=0;
1591  }
1592 
1593  /* close the fake event */
1594  munmap(addr,4096);
1595  close(fd);
1596 
1597  return rdpmc_exists;
1598 
1599 }
1600 
1601 
1602 /* Initialize the perf_event component */
1603 int
1605 {
1606 
1607  int retval;
1608  int paranoid_level;
1609 
1610  FILE *fff;
1611 
1612  our_cidx=cidx;
1613 
1614  /* The is the official way to detect if perf_event support exists */
1615  /* The file is called perf_counter_paranoid on 2.6.31 */
1616  /* currently we are lazy and do not support 2.6.31 kernels */
1617  fff=fopen("/proc/sys/kernel/perf_event_paranoid","r");
1618  if (fff==NULL) {
1619  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1620  "perf_event support not detected",PAPI_MAX_STR_LEN);
1621  return PAPI_ENOCMP;
1622  }
1623 
1624  /* 2 means no kernel measurements allowed */
1625  /* 1 means normal counter access */
1626  /* 0 means you can access CPU-specific data */
1627  /* -1 means no restrictions */
1628  retval=fscanf(fff,"%d",&paranoid_level);
1629  if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
1630  fclose(fff);
1631 
1632  if ((paranoid_level==2) && (getuid()!=0)) {
1633  SUBDBG("/proc/sys/kernel/perf_event_paranoid prohibits kernel counts");
1635  }
1636 
1637  /* Detect NMI watchdog which can steal counters */
1639  if (nmi_watchdog_active) {
1640  SUBDBG("The Linux nmi_watchdog is using one of the performance "
1641  "counters, reducing the total number available.\n");
1642  }
1643  /* Kernel multiplexing is broken prior to kernel 2.6.34 */
1644  /* The fix was probably git commit: */
1645  /* 45e16a6834b6af098702e5ea6c9a40de42ff77d8 */
1646  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) {
1649  }
1650  else {
1653  }
1654 
1655  /* Check that processor is supported */
1658  PAPI_OK) {
1659  fprintf(stderr,"warning, your processor is unsupported\n");
1660  /* should not return error, as software events should still work */
1661  }
1662 
1663  /* Setup mmtimers, if appropriate */
1664  retval=mmtimer_setup();
1665  if (retval) {
1666  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1667  "Error initializing mmtimer",PAPI_MAX_STR_LEN);
1668  return retval;
1669  }
1670 
1671  /* Set the overflow signal */
1672  _papi_hwd[cidx]->cmp_info.hardware_intr_sig = SIGRTMIN + 2;
1673 
1674  /* Run Vendor-specific fixups */
1675  pe_vendor_fixups(_papi_hwd[cidx]);
1676 
1677  /* Detect if we can use rdpmc (or equivalent) */
1678  /* We currently do not use rdpmc as it is slower in tests */
1679  /* than regular read (as of Linux 3.5) */
1680  retval=_pe_detect_rdpmc(_papi_hwd[cidx]->cmp_info.default_domain);
1681  if (retval < 0 ) {
1682  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1683  "sys_perf_event_open() failed, perf_event support for this platform may be broken",PAPI_MAX_STR_LEN);
1684 
1685  return retval;
1686  }
1688 
1689  /* Run the libpfm4-specific setup */
1690  retval = _papi_libpfm4_init(_papi_hwd[cidx]);
1691  if (retval) {
1692  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1693  "Error initializing libpfm4",PAPI_MAX_STR_LEN);
1694  return retval;
1695  }
1696 
1697  retval = _pe_libpfm4_init(_papi_hwd[cidx], cidx,
1700  if (retval) {
1701  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
1702  "Error initializing libpfm4",PAPI_MAX_STR_LEN);
1703  return retval;
1704  }
1705 
1706  return PAPI_OK;
1707 
1708 }
1709 
1710 /* Shutdown the perf_event component */
1711 int
1713 
1714  /* deallocate our event table */
1715  _pe_libpfm4_shutdown(&_perf_event_vector, &perf_native_event_table);
1716 
1717  /* Shutdown libpfm4 */
1719 
1720  return PAPI_OK;
1721 }
1722 
1723 
1724 
1725 
1726 int
1727 _pe_ntv_enum_events( unsigned int *PapiEventCode, int modifier )
1728 {
1729  return _pe_libpfm4_ntv_enum_events(PapiEventCode, modifier,
1731 }
1732 
1733 int
1734 _pe_ntv_name_to_code( char *name, unsigned int *event_code) {
1735  return _pe_libpfm4_ntv_name_to_code(name,event_code,
1737 }
1738 
1739 int
1740 _pe_ntv_code_to_name(unsigned int EventCode,
1741  char *ntv_name, int len) {
1742  return _pe_libpfm4_ntv_code_to_name(EventCode,
1743  ntv_name, len,
1745 }
1746 
1747 int
1748 _pe_ntv_code_to_descr( unsigned int EventCode,
1749  char *ntv_descr, int len) {
1750 
1751  return _pe_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len,
1753 }
1754 
1755 int
1756 _pe_ntv_code_to_info(unsigned int EventCode,
1757  PAPI_event_info_t *info) {
1758 
1759  return _pe_libpfm4_ntv_code_to_info(EventCode, info,
1761 }
1762 
1763 /* These functions are based on builtin-record.c in the */
1764 /* kernel's tools/perf directory. */
1765 
1766 static uint64_t
1768 {
1769  struct perf_event_mmap_page *pc = pe->mmap_buf;
1770  int head;
1771 
1772  if ( pc == NULL ) {
1773  PAPIERROR( "perf_event_mmap_page is NULL" );
1774  return 0;
1775  }
1776 
1777  head = pc->data_head;
1778  rmb( );
1779 
1780  return head;
1781 }
1782 
1783 static void
1785 {
1786  struct perf_event_mmap_page *pc = pe->mmap_buf;
1787 
1788  /* ensure all reads are done before we write the tail out. */
1789  pc->data_tail = tail;
1790 }
1791 
1792 
1793 /* Does the kernel define these somewhere? */
1794 struct ip_event {
1795  struct perf_event_header header;
1796  uint64_t ip;
1797 };
1798 struct lost_event {
1799  struct perf_event_header header;
1800  uint64_t id;
1801  uint64_t lost;
1802 };
1803 typedef union event_union {
1804  struct perf_event_header header;
1805  struct ip_event ip;
1808 
1809 /* Should re-write with comments if we ever figure out what's */
1810 /* going on here. */
1811 static void
1813  int profile_index )
1814 {
1815  uint64_t head = mmap_read_head( pe );
1816  uint64_t old = pe->tail;
1817  unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize( );
1818  int diff;
1819 
1820  diff = head - old;
1821  if ( diff < 0 ) {
1822  SUBDBG( "WARNING: failed to keep up with mmap data. head = %" PRIu64
1823  ", tail = %" PRIu64 ". Discarding samples.\n", head, old );
1824  /* head points to a known good entry, start there. */
1825  old = head;
1826  }
1827 
1828  for( ; old != head; ) {
1830  & data[old & pe->mask];
1831  perf_sample_event_t event_copy;
1832  size_t size = event->header.size;
1833 
1834  /* Event straddles the mmap boundary -- header should always */
1835  /* be inside due to u64 alignment of output. */
1836  if ( ( old & pe->mask ) + size != ( ( old + size ) & pe->mask ) ) {
1837  uint64_t offset = old;
1838  uint64_t len = min( sizeof ( *event ), size ), cpy;
1839  void *dst = &event_copy;
1840 
1841  do {
1842  cpy = min( pe->mask + 1 - ( offset & pe->mask ), len );
1843  memcpy( dst, &data[offset & pe->mask], cpy );
1844  offset += cpy;
1845  dst = ((unsigned char*)dst) + cpy;
1846  len -= cpy;
1847  } while ( len );
1848 
1849  event = &event_copy;
1850  }
1851  old += size;
1852 
1853  SUBDBG( "event->type = %08x\n", event->header.type );
1854  SUBDBG( "event->size = %d\n", event->header.size );
1855 
1856  switch ( event->header.type ) {
1857  case PERF_RECORD_SAMPLE:
1858  _papi_hwi_dispatch_profile( ( *thr )->running_eventset[cidx],
1859  ( caddr_t ) ( unsigned long ) event->ip.ip,
1860  0, profile_index );
1861  break;
1862 
1863  case PERF_RECORD_LOST:
1864  SUBDBG( "Warning: because of a mmap buffer overrun, %" PRId64
1865  " events were lost.\n"
1866  "Loss was recorded when counter id %#"PRIx64
1867  " overflowed.\n", event->lost.lost, event->lost.id );
1868  break;
1869 
1870  default:
1871  SUBDBG( "Error: unexpected header type - %d\n",
1872  event->header.type );
1873  break;
1874  }
1875  }
1876 
1877  pe->tail = old;
1878  mmap_write_tail( pe, old );
1879 }
1880 
1881 /* Find a native event specified by a profile index */
1882 static int
1883 find_profile_index( EventSetInfo_t *ESI, int evt_idx, int *flags,
1884  unsigned int *native_index, int *profile_index )
1885 {
1886  int pos, esi_index, count;
1887 
1888  for ( count = 0; count < ESI->profile.event_counter; count++ ) {
1889  esi_index = ESI->profile.EventIndex[count];
1890  pos = ESI->EventInfoArray[esi_index].pos[0];
1891 
1892  if ( pos == evt_idx ) {
1893  *profile_index = count;
1894  *native_index = ESI->NativeInfoArray[pos].ni_event &
1896  *flags = ESI->profile.flags;
1897  SUBDBG( "Native event %d is at profile index %d, flags %d\n",
1898  *native_index, *profile_index, *flags );
1899  return PAPI_OK;
1900  }
1901  }
1902  PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d", count,
1903  ESI->profile.event_counter );
1904  return PAPI_EBUG;
1905 }
1906 
1907 
1908 
1909 /* What exactly does this do? */
1910 static int
1911 process_smpl_buf( int evt_idx, ThreadInfo_t **thr, int cidx )
1912 {
1913  int ret, flags, profile_index;
1914  unsigned native_index;
1915  pe_control_t *ctl;
1916 
1917  ret = find_profile_index( ( *thr )->running_eventset[cidx], evt_idx,
1918  &flags, &native_index, &profile_index );
1919  if ( ret != PAPI_OK ) {
1920  return ret;
1921  }
1922 
1923  ctl= (*thr)->running_eventset[cidx]->ctl_state;
1924 
1925  mmap_read( cidx, thr,
1926  &(ctl->events[evt_idx]),
1927  profile_index );
1928 
1929  return PAPI_OK;
1930 }
1931 
1932 /*
1933  * This function is used when hardware overflows are working or when
1934  * software overflows are forced
1935  */
1936 
1937 void
1938 _pe_dispatch_timer( int n, hwd_siginfo_t *info, void *uc)
1939 {
1940  ( void ) n; /*unused */
1941  _papi_hwi_context_t hw_context;
1942  int found_evt_idx = -1, fd = info->si_fd;
1943  caddr_t address;
1945  int i;
1946  pe_control_t *ctl;
1947  int cidx = _perf_event_vector.cmp_info.CmpIdx;
1948 
1949  if ( thread == NULL ) {
1950  PAPIERROR( "thread == NULL in _papi_pe_dispatch_timer for fd %d!", fd );
1951  return;
1952  }
1953 
1954  if ( thread->running_eventset[cidx] == NULL ) {
1955  PAPIERROR( "thread->running_eventset == NULL in "
1956  "_papi_pe_dispatch_timer for fd %d!",fd );
1957  return;
1958  }
1959 
1960  if ( thread->running_eventset[cidx]->overflow.flags == 0 ) {
1961  PAPIERROR( "thread->running_eventset->overflow.flags == 0 in "
1962  "_papi_pe_dispatch_timer for fd %d!", fd );
1963  return;
1964  }
1965 
1966  hw_context.si = info;
1967  hw_context.ucontext = ( hwd_ucontext_t * ) uc;
1968 
1969  if ( thread->running_eventset[cidx]->overflow.flags &
1971  address = GET_OVERFLOW_ADDRESS( hw_context );
1972  _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
1973  address, NULL, 0,
1974  0, &thread, cidx );
1975  return;
1976  }
1977 
1978  if ( thread->running_eventset[cidx]->overflow.flags !=
1980  PAPIERROR( "thread->running_eventset->overflow.flags is set to "
1981  "something other than PAPI_OVERFLOW_HARDWARE or "
1982  "PAPI_OVERFLOW_FORCE_SW for fd %d (%#x)",
1983  fd , thread->running_eventset[cidx]->overflow.flags);
1984  }
1985 
1986  /* convoluted way to get ctl */
1987  ctl= thread->running_eventset[cidx]->ctl_state;
1988 
1989  /* See if the fd is one that's part of the this thread's context */
1990  for( i=0; i < ctl->num_events; i++ ) {
1991  if ( fd == ctl->events[i].event_fd ) {
1992  found_evt_idx = i;
1993  break;
1994  }
1995  }
1996 
1997  if ( found_evt_idx == -1 ) {
1998  PAPIERROR( "Unable to find fd %d among the open event fds "
1999  "_papi_hwi_dispatch_timer!", fd );
2000  return;
2001  }
2002 
2003  if (ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ) == -1 ) {
2004  PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed");
2005  }
2006 
2007  if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) &&
2008  !( thread->running_eventset[cidx]->profile.flags &
2009  PAPI_PROFIL_FORCE_SW ) ) {
2010  process_smpl_buf( found_evt_idx, &thread, cidx );
2011  }
2012  else {
2013  uint64_t ip;
2014  unsigned int head;
2015  pe_event_info_t *pe = &(ctl->events[found_evt_idx]);
2016  unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize( );
2017 
2018  /*
2019  * Read up the most recent IP from the sample in the mmap buffer. To
2020  * do this, we make the assumption that all of the records in the
2021  * mmap buffer are the same size, and that they all contain the IP as
2022  * their only record element. This means that we can use the
2023  * data_head element from the user page and move backward one record
2024  * from that point and read the data. Since we don't actually need
2025  * to access the header of the record, we can just subtract 8 (size
2026  * of the IP) from data_head and read up that word from the mmap
2027  * buffer. After we subtract 8, we account for mmap buffer wrapping
2028  * by AND'ing this offset with the buffer mask.
2029  */
2030  head = mmap_read_head( pe );
2031 
2032  if ( head == 0 ) {
2033  PAPIERROR( "Attempting to access memory which may be inaccessable" );
2034  return;
2035  }
2036  ip = *( uint64_t * ) ( data + ( ( head - 8 ) & pe->mask ) );
2037  /*
2038  * Update the tail to the current head pointer.
2039  *
2040  * Note: that if we were to read the record at the tail pointer,
2041  * rather than the one at the head (as you might otherwise think
2042  * would be natural), we could run into problems. Signals don't
2043  * stack well on Linux, particularly if not using RT signals, and if
2044  * they come in rapidly enough, we can lose some. Overtime, the head
2045  * could catch up to the tail and monitoring would be stopped, and
2046  * since no more signals are coming in, this problem will never be
2047  * resolved, resulting in a complete loss of overflow notification
2048  * from that point on. So the solution we use here will result in
2049  * only the most recent IP value being read every time there are two
2050  * or more samples in the buffer (for that one overflow signal). But
2051  * the handler will always bring up the tail, so the head should
2052  * never run into the tail.
2053  */
2054  mmap_write_tail( pe, head );
2055 
2056  /*
2057  * The fourth parameter is supposed to be a vector of bits indicating
2058  * the overflowed hardware counters, but it's not really clear that
2059  * it's useful, because the actual hardware counters used are not
2060  * exposed to the PAPI user. For now, I'm just going to set the bit
2061  * that indicates which event register in the array overflowed. The
2062  * result is that the overflow vector will not be identical to the
2063  * perfmon implementation, and part of that is due to the fact that
2064  * which hardware register is actually being used is opaque at the
2065  * user level (the kernel event dispatcher hides that info).
2066  */
2067 
2068  _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
2069  ( caddr_t ) ( unsigned long ) ip,
2070  NULL, ( 1 << found_evt_idx ), 0,
2071  &thread, cidx );
2072 
2073  }
2074 
2075  /* Restart the counters */
2076  if (ioctl( fd, PERF_EVENT_IOC_REFRESH, PAPI_REFRESH_VALUE ) == -1) {
2077  PAPIERROR( "overflow refresh failed", 0 );
2078  }
2079 }
2080 
2081 /* Stop profiling */
2082 int
2084 {
2085  int i, ret = PAPI_OK;
2086  pe_control_t *ctl;
2087  int cidx;
2088 
2089  ctl=ESI->ctl_state;
2090 
2091  cidx=ctl->cidx;
2092 
2093  /* Loop through all of the events and process those which have mmap */
2094  /* buffers attached. */
2095  for ( i = 0; i < ctl->num_events; i++ ) {
2096  /* Use the mmap_buf field as an indicator of this fd being used for */
2097  /* profiling. */
2098  if ( ctl->events[i].mmap_buf ) {
2099  /* Process any remaining samples in the sample buffer */
2100  ret = process_smpl_buf( i, &thread, cidx );
2101  if ( ret ) {
2102  PAPIERROR( "process_smpl_buf returned error %d", ret );
2103  return ret;
2104  }
2105  }
2106  }
2107  return ret;
2108 }
2109 
2110 /* Setup an event to cause overflow */
2111 int
2112 _pe_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
2113 {
2114  SUBDBG("ENTER: ESI: %p, EventIndex: %d, threshold: %d\n", ESI, EventIndex, threshold);
2115 
2116  pe_context_t *ctx;
2117  pe_control_t *ctl = (pe_control_t *) ( ESI->ctl_state );
2118  int i, evt_idx, found_non_zero_sample_period = 0, retval = PAPI_OK;
2119  int cidx;
2120 
2121  cidx = ctl->cidx;
2122  ctx = ( pe_context_t *) ( ESI->master->context[cidx] );
2123 
2124  evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
2125 
2126  SUBDBG("Attempting to set overflow for index %d (%d) of EventSet %d\n",
2127  evt_idx,EventIndex,ESI->EventSetIndex);
2128 
2129  if (evt_idx<0) {
2130  SUBDBG("EXIT: evt_idx: %d\n", evt_idx);
2131  return PAPI_EINVAL;
2132  }
2133 
2134  if ( threshold == 0 ) {
2135  /* If this counter isn't set to overflow, it's an error */
2136  if ( ctl->events[evt_idx].attr.sample_period == 0 ) {
2137  SUBDBG("EXIT: PAPI_EINVAL, Tried to clear sample threshold when it was not set\n");
2138  return PAPI_EINVAL;
2139  }
2140  }
2141 
2142  ctl->events[evt_idx].attr.sample_period = threshold;
2143 
2144  /*
2145  * Note that the wakeup_mode field initially will be set to zero
2146  * (WAKEUP_MODE_COUNTER_OVERFLOW) as a result of a call to memset 0 to
2147  * all of the events in the ctl struct.
2148  *
2149  * Is it even set to any other value elsewhere?
2150  */
2151  switch ( ctl->events[evt_idx].wakeup_mode ) {
2152  case WAKEUP_MODE_PROFILING:
2153  /* Setting wakeup_events to special value zero means issue a */
2154  /* wakeup (signal) on every mmap page overflow. */
2155  ctl->events[evt_idx].attr.wakeup_events = 0;
2156  break;
2157 
2159  /* Can this code ever be called? */
2160 
2161  /* Setting wakeup_events to one means issue a wakeup on every */
2162  /* counter overflow (not mmap page overflow). */
2163  ctl->events[evt_idx].attr.wakeup_events = 1;
2164  /* We need the IP to pass to the overflow handler */
2165  ctl->events[evt_idx].attr.sample_type = PERF_SAMPLE_IP;
2166  /* one for the user page, and two to take IP samples */
2167  ctl->events[evt_idx].nr_mmap_pages = 1 + 2;
2168  break;
2169  default:
2170  PAPIERROR( "ctl->wakeup_mode[%d] set to an unknown value - %u",
2171  evt_idx, ctl->events[evt_idx].wakeup_mode);
2172  SUBDBG("EXIT: PAPI_EBUG\n");
2173  return PAPI_EBUG;
2174  }
2175 
2176  /* Check for non-zero sample period */
2177  for ( i = 0; i < ctl->num_events; i++ ) {
2178  if ( ctl->events[evt_idx].attr.sample_period ) {
2179  found_non_zero_sample_period = 1;
2180  break;
2181  }
2182  }
2183 
2184  if ( found_non_zero_sample_period ) {
2185  /* turn on internal overflow flag for this event set */
2186  ctl->overflow = 1;
2187 
2188  /* Enable the signal handler */
2190  ctl->overflow_signal,
2191  1, ctl->cidx );
2192  if (retval != PAPI_OK) {
2193  SUBDBG("Call to _papi_hwi_start_signal returned: %d\n", retval);
2194  }
2195  } else {
2196  /* turn off internal overflow flag for this event set */
2197  ctl->overflow = 0;
2198 
2199  /* Remove the signal handler, if there are no remaining non-zero */
2200  /* sample_periods set */
2202  if ( retval != PAPI_OK ) {
2203  SUBDBG("Call to _papi_hwi_stop_signal returned: %d\n", retval);
2204  return retval;
2205  }
2206  }
2207 
2208  retval = _pe_update_control_state( ctl, NULL,
2209  ( (pe_control_t *) (ESI->ctl_state) )->num_events,
2210  ctx );
2211 
2212  SUBDBG("EXIT: return: %d\n", retval);
2213  return retval;
2214 }
2215 
2216 /* Enable profiling */
2217 int
2218 _pe_set_profile( EventSetInfo_t *ESI, int EventIndex, int threshold )
2219 {
2220  int ret;
2221  int evt_idx;
2222  pe_control_t *ctl = ( pe_control_t *) ( ESI->ctl_state );
2223 
2224  /* Since you can't profile on a derived event, the event is always the */
2225  /* first and only event in the native event list. */
2226  evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
2227 
2228  if ( threshold == 0 ) {
2229  SUBDBG( "MUNMAP(%p,%"PRIu64")\n", ctl->events[evt_idx].mmap_buf,
2230  ( uint64_t ) ctl->events[evt_idx].nr_mmap_pages *
2231  getpagesize( ) );
2232 
2233  if ( ctl->events[evt_idx].mmap_buf ) {
2234  munmap( ctl->events[evt_idx].mmap_buf,
2235  ctl->events[evt_idx].nr_mmap_pages * getpagesize() );
2236  }
2237  ctl->events[evt_idx].mmap_buf = NULL;
2238  ctl->events[evt_idx].nr_mmap_pages = 0;
2239  ctl->events[evt_idx].attr.sample_type &= ~PERF_SAMPLE_IP;
2240  ret = _pe_set_overflow( ESI, EventIndex, threshold );
2241  /* ??? #warning "This should be handled somewhere else" */
2242  ESI->state &= ~( PAPI_OVERFLOWING );
2243  ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE );
2244 
2245  return ret;
2246  }
2247 
2248  /* Look up the native event code */
2250  /* Not supported yet... */
2251 
2252  return PAPI_ENOSUPP;
2253  }
2254  if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) {
2255  /* This requires an ability to randomly alter the sample_period within */
2256  /* a given range. Kernel does not have this ability. FIXME */
2257  return PAPI_ENOSUPP;
2258  }
2259 
2260  /* Just a guess at how many pages would make this relatively efficient. */
2261  /* Note that it's "1 +" because of the need for a control page, and the */
2262  /* number following the "+" must be a power of 2 (1, 4, 8, 16, etc) or */
2263  /* zero. This is required to optimize dealing with circular buffer */
2264  /* wrapping of the mapped pages. */
2265 
2266  ctl->events[evt_idx].nr_mmap_pages = (1+8);
2267  ctl->events[evt_idx].attr.sample_type |= PERF_SAMPLE_IP;
2268 
2269  ret = _pe_set_overflow( ESI, EventIndex, threshold );
2270  if ( ret != PAPI_OK ) return ret;
2271 
2272  return PAPI_OK;
2273 }
2274 
2275 
2276 /* Our component vector */
2277 
2278 papi_vector_t _perf_event_vector = {
2279  .cmp_info = {
2280  /* component information (unspecified values initialized to 0) */
2281  .name = "perf_event",
2282  .short_name = "perf",
2283  .version = "5.0",
2284  .description = "Linux perf_event CPU counters",
2285 
2286  .default_domain = PAPI_DOM_USER,
2287  .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR,
2288  .default_granularity = PAPI_GRN_THR,
2289  .available_granularities = PAPI_GRN_THR | PAPI_GRN_SYS,
2290 
2291  .hardware_intr = 1,
2292  .kernel_profile = 1,
2293 
2294  /* component specific cmp_info initializations */
2295  .fast_virtual_timer = 0,
2296  .attach = 1,
2297  .attach_must_ptrace = 1,
2298  .cpu = 1,
2299  .inherit = 1,
2300  .cntr_umasks = 1,
2301 
2302  },
2303 
2304  /* sizes of framework-opaque component-private structures */
2305  .size = {
2306  .context = sizeof ( pe_context_t ),
2307  .control_state = sizeof ( pe_control_t ),
2308  .reg_value = sizeof ( int ),
2309  .reg_alloc = sizeof ( int ),
2310  },
2311 
2312  /* function pointers in this component */
2313  .init_component = _pe_init_component,
2314  .shutdown_component = _pe_shutdown_component,
2315  .init_thread = _pe_init_thread,
2316  .init_control_state = _pe_init_control_state,
2317  .dispatch_timer = _pe_dispatch_timer,
2318 
2319  /* function pointers from the shared perf_event lib */
2320  .start = _pe_start,
2321  .stop = _pe_stop,
2322  .read = _pe_read,
2323  .shutdown_thread = _pe_shutdown_thread,
2324  .ctl = _pe_ctl,
2325  .update_control_state = _pe_update_control_state,
2326  .set_domain = _pe_set_domain,
2327  .reset = _pe_reset,
2328  .set_overflow = _pe_set_overflow,
2329  .set_profile = _pe_set_profile,
2330  .stop_profiling = _pe_stop_profiling,
2331  .write = _pe_write,
2332 
2333 
2334  /* from counter name mapper */
2335  .ntv_enum_events = _pe_ntv_enum_events,
2336  .ntv_name_to_code = _pe_ntv_name_to_code,
2337  .ntv_code_to_name = _pe_ntv_code_to_name,
2338  .ntv_code_to_descr = _pe_ntv_code_to_descr,
2339  .ntv_code_to_info = _pe_ntv_code_to_info,
2340 };
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:626
i inherit inherit
#define PAPI_ENOEVNT
Definition: papi.h:258
void _pe_dispatch_timer(int n, hwd_siginfo_t *info, void *uc)
Definition: perf_event.c:1938
ssize_t read(int fd, void *buf, size_t count)
Definition: appio.c:225
long long counts[PERF_EVENT_MAX_MPX_COUNTERS]
int _pe_shutdown_thread(hwd_context_t *ctx)
Definition: perf_event.c:861
int _papi_hwi_get_ntv_idx(unsigned int papi_evt_code)
_papi_int_inherit_t inherit
static int process_smpl_buf(int evt_idx, ThreadInfo_t **thr, int cidx)
Definition: perf_event.c:1911
int errno
int close(int fd)
Definition: appio.c:175
#define PAPI_OVERFLOWING
Definition: papi.h:376
int _pe_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1176
#define PAPI_CPU_ATTACH
Definition: papi.h:455
int _pe_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len)
Definition: perf_event.c:1748
int _pe_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:875
#define PERF_EVENT_MAX_MPX_COUNTERS
Definition: perf_event_lib.h:5
EventSetInfo_t * ESI
static int close_pe_events(pe_context_t *ctx, pe_control_t *ctl)
Definition: perf_event.c:753
int _pe_libpfm4_ntv_enum_events(unsigned int *PapiEventCode, int modifier, struct native_event_table_t *event_table)
struct native_event_t * native_events
unsigned int granularity
long long flags
Definition: iozone.c:12330
#define PAPI_DEF_ITIMER_NS
Definition: papi.h:453
int _pe_libpfm4_ntv_name_to_code(char *name, unsigned int *event_code, struct native_event_table_t *event_table)
int _pe_ntv_enum_events(unsigned int *PapiEventCode, int modifier)
Definition: perf_event.c:1727
EventSetInfo_t * ESI
int _papi_libpfm4_init(papi_vector_t *my_vector)
struct in_addr * ip
Definition: iozone.c:20416
#define PAPI_INSTR_ADDRESS
Definition: papi.h:451
gc head
Definition: libasync.c:669
#define PAPI_PROFIL_DATA_EAR
Definition: papi.h:402
static int _pe_set_domain(hwd_control_state_t *ctl, int domain)
Definition: perf_event.c:850
#define PAPI_DEF_MPX_NS
Definition: papi.h:434
cpu
Definition: iozone.c:3872
_papi_int_addr_range_t address_range
static int bug_check_scheduability(void)
Definition: perf_event.c:173
#define READ_BUFFER_SIZE
Definition: perf_event.c:472
static long sys_perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
Definition: perf_event.c:313
int _pe_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
Definition: perf_event.c:1756
int default_granularity
Definition: papi.h:642
#define PAPI_ENOSUPP
Definition: papi.h:269
unsigned int wakeup_mode
off64_t offset
Definition: iozone.c:1279
#define PAPI_DATA_ADDRESS
Definition: papi.h:450
#define PAPI_DOM_KERNEL
Definition: papi.h:298
int fd
Definition: iozone.c:1291
#define PAPI_REFRESH_VALUE
Definition: perf_event.c:92
static int bug_format_group(void)
Definition: perf_event.c:195
EventSetInfo_t * ESI
static int set_irange(hwd_context_t *ctx, hwd_control_state_t *current_state, _papi_int_option_t *option)
Definition: perfmon-ia64.c:919
struct perf_event_header header
Definition: perf_event.c:1795
pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]
#define PERF_EVENTS_RUNNING
Definition: perf_event.c:59
#define PAPI_EBUG
Definition: papi.h:257
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
#define PMU_TYPE_OS
int _pe_libpfm4_get_cidx(void)
Definition: perf_event.c:71
static int find_profile_index(EventSetInfo_t *ESI, int evt_idx, int *flags, unsigned int *native_index, int *profile_index)
Definition: perf_event.c:1883
static pid_t mygettid(void)
Definition: darwin-common.h:11
int _pe_init_control_state(hwd_control_state_t *ctl)
Definition: perf_event.c:1528
fclose(thread_wqfd)
#define PAPI_DOM_USER
Definition: papi.h:296
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
int _pe_libpfm4_init(papi_vector_t *my_vector, int cidx, struct native_event_table_t *event_table, int pmu_type)
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define PAPI_MAX_SW_MPX_EVENTS
Definition: sw_multiplex.h:4
static int check_permissions(unsigned long tid, unsigned int cpu_num, unsigned int domain, unsigned int granularity, unsigned int multiplex, unsigned int inherit)
Definition: perf_event.c:413
static int _pe_detect_rdpmc(int default_domain)
Definition: perf_event.c:1553
static int set_default_domain(EventSetInfo_t *zero, int domain)
Definition: aix.c:510
#define PAPI_EPERM
Definition: papi.h:266
struct perf_event_header header
Definition: perf_event.c:1799
static int processor_supported(int vendor, int family)
Definition: perf_event.c:103
papi_vector_t * _papi_hwd[]
#define PAPI_INHERIT
Definition: papi.h:456
Return codes and api definitions.
uint32_t nr_mmap_pages
FILE * fff[MAX_EVENTS]
unsigned int domain
char events[MAX_EVENTS][BUFSIZ]
int multiplex(void)
Definition: multiplex.c:35
_papi_int_attach_t attach
int _pe_shutdown_component(void)
Definition: perf_event.c:1712
long long ret
Definition: iozone.c:1346
unsigned int overflow
unsigned long tid
int _pe_libpfm4_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len, struct native_event_table_t *event_table)
papi_vector_t _perf_event_vector
Definition: perf_event.c:65
int _pe_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: perf_event.c:1209
_papi_int_cpu_t cpu
int i
Definition: fileop.c:140
EventSetOverflowInfo_t overflow
int _papi_libpfm4_shutdown(void)
#define PAPI_OVERFLOW_HARDWARE
Definition: papi.h:410
unsigned int fast_real_timer
Definition: papi.h:657
PAPI_os_info_t _papi_os_info
Definition: aix.c:1210
struct _ThreadInfo * master
#define PAPI_VENDOR_IBM
Definition: papi.h:348
int _pe_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: perf_event.c:930
#define WAKEUP_MODE_PROFILING
Definition: perf_event.c:82
static int pe_vendor_fixups(papi_vector_t *vector)
Definition: perf_event.c:117
static int pid
char *long long size
Definition: iozone.c:12023
int _pe_set_overflow(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perf_event.c:2112
int _pe_stop_profiling(ThreadInfo_t *thread, EventSetInfo_t *ESI)
Definition: perf_event.c:2083
static int cidx
Definition: event_info.c:40
static int check_scheduability(pe_context_t *ctx, pe_control_t *ctl, int idx)
Definition: perf_event.c:482
unsigned int fast_counter_read
Definition: papi.h:656
hwd_ucontext_t * ucontext
#define PAPI_ESYS
Definition: papi.h:253
static int native
Definition: event_info.c:39
#define PAPI_PROFIL_RANDOM
Definition: papi.h:395
#define PAPI_GRANUL
Definition: papi.h:433
void * thread(void *arg)
Definition: kufrin.c:31
#define PERF_EVENTS_OPENED
Definition: perf_event.c:58
void *long long tid
Definition: iozone.c:18586
int _pe_ntv_name_to_code(char *name, unsigned int *event_code)
Definition: perf_event.c:1734
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_PROFIL_INST_EAR
Definition: papi.h:403
#define PAPI_VENDOR_MIPS
Definition: papi.h:353
_papi_int_granularity_t granularity
static void mmap_read(int cidx, ThreadInfo_t **thr, pe_event_info_t *pe, int profile_index)
Definition: perf_event.c:1812
#define PAPI_ECNFLCT
Definition: papi.h:259
EventSetInfo_t * ESI
#define PAPI_DETACH
Definition: papi.h:427
void PAPIERROR(char *format,...)
unsigned int multiplexed
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:401
static uint64_t mmap_read_head(pe_event_info_t *pe)
Definition: perf_event.c:1767
int mmtimer_setup(void)
Definition: linux-timer.c:116
#define PAPI_ATTACH
Definition: papi.h:445
unsigned int kernel_multiplex
Definition: papi.h:653
#define PAPI_ECMP
Definition: papi.h:254
struct native_event_table_t * event_table
#define PAPI_VENDOR_ARM
Definition: papi.h:352
#define min(x, y)
Definition: darwin-common.h:4
#define PMU_TYPE_CORE
#define PAPI_MULTIPLEX
Definition: papi.h:429
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:441
#define PAPI_GRN_THR
Definition: papi.h:360
#define WAKEUP_MODE_COUNTER_OVERFLOW
Definition: perf_event.c:81
EventSetInfo_t * ESI
#define PAPI_GRN_SYS_CPU
Definition: papi.h:365
_papi_int_multiplex_t multiplex
char * addr
Definition: iozone.c:12026
NativeInfo_t * NativeInfoArray
uint64_t id
Definition: perf_event.c:1800
EventInfo_t * EventInfoArray
int cpuid_family
Definition: papi.h:789
#define PAPI_ENOMEM
Definition: papi.h:252
int threshold
int _pe_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1132
#define PAPI_VENDOR_CRAY
Definition: papi.h:349
static int bug_sync_read(void)
Definition: perf_event.c:215
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:55
PAPI_hw_info_t hw_info
again struct sockaddr sizeof(struct sockaddr_in))
unsigned int overflow_signal
int _pe_libpfm4_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info, struct native_event_table_t *event_table)
#define PAPI_ENOCMP
Definition: papi.h:268
#define PAPI_DOMAIN
Definition: papi.h:431
#define PAPI_VENDOR_INTEL
Definition: papi.h:346
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
static void mmap_write_tail(pe_event_info_t *pe, uint64_t tail)
Definition: perf_event.c:1784
int _pe_libpfm4_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len, struct native_event_table_t *event_table)
#define LINUX_VERSION(a, b, c)
Definition: linux-common.h:4
uint64_t ip
Definition: perf_event.c:1796
int _pe_init_component(int cidx)
Definition: perf_event.c:1604
int _pe_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len)
Definition: perf_event.c:1740
int vendor
Definition: papi.h:784
struct sigcontext hwd_ucontext_t
Definition: aix-context.h:10
unsigned int cpu_num
static int map_perf_event_errors_to_papi(int perf_event_error)
Definition: perf_event.c:364
int _pe_libpfm4_shutdown(papi_vector_t *my_vector, struct native_event_table_t *event_table)
#define PAPI_OVERFLOW_FORCE_SW
Definition: papi.h:409
static int fcntl_setown_fd(int fd)
Definition: perf_event.c:229
static int set_drange(hwd_context_t *ctx, hwd_control_state_t *current_state, _papi_int_option_t *option)
Definition: perfmon-ia64.c:767
#define PAPI_DEF_ITIMER
Definition: papi.h:452
#define PAPI_PROFILING
Definition: papi.h:377
EventSetInfo_t ** running_eventset
Definition: threads.h:30
char * name
Definition: iozone.c:23648
perf_event_attr_t attr
void _papi_hwi_dispatch_profile(EventSetInfo_t *ESI, caddr_t pc, long long over, int profile_index)
Definition: extras.c:163
struct perf_event_attr attr
struct native_event_table_t perf_native_event_table
Definition: perf_event.c:68
int
Definition: iozone.c:18528
int _pe_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: perf_event.c:1339
#define MAP_FAILED
Definition: iozone.c:336
static int our_cidx
Definition: perf_event.c:69
inline_static ThreadInfo_t * _papi_hwi_lookup_thread(int custom_tid)
Definition: threads.h:92
#define PAPI_NATIVE_AND_MASK
uint64_t lost
Definition: perf_event.c:1801
#define PAPI_PROFIL_FORCE_SW
Definition: papi.h:401
int _pe_init_thread(hwd_context_t *hwd_ctx)
Definition: perf_event.c:1512
unsigned int inherit
int _linux_detect_nmi_watchdog()
Definition: linux-common.c:598
#define F_OWNER_TID
Definition: linux-common.h:28
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
_papi_int_domain_t domain
gc tail
Definition: libasync.c:667
#define PAPI_GRN_PROCG
Definition: papi.h:363
char model_string[PAPI_MAX_STR_LEN]
Definition: papi.h:787
int nmi_watchdog_active
Definition: perf_event.c:62
hwd_siginfo_t * si
#define PAPI_DOM_OTHER
Definition: papi.h:299
static unsigned int get_read_format(unsigned int multiplex, unsigned int inherit, int format_group)
Definition: perf_event.c:263
#define F_SETOWN_EX
Definition: linux-common.h:25
int _papi_hwi_dispatch_overflow_signal(void *papiContext, caddr_t address, int *isHardware, long long overflow_bit, int genOverflowBit, ThreadInfo_t **t, int cidx)
Definition: extras.c:214
#define PAPI_DOM_SUPERVISOR
Definition: papi.h:300
EventSetInfo_t * ESI
EventSetProfileInfo_t profile
#define PAPI_GRN_SYS
Definition: papi.h:364
hwd_control_state_t * ctl_state
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
int _pe_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *from)
Definition: perf_event.c:900
#define PAPI_ECOUNT
Definition: papi.h:274
#define GET_OVERFLOW_ADDRESS(ctx)
Definition: aix-context.h:12
int _pe_set_profile(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perf_event.c:2218
static int open_pe_events(pe_context_t *ctx, pe_control_t *ctl)
Definition: perf_event.c:628
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")
EventSetInfo_t * ESI
#define PAPI_VENDOR_AMD
Definition: papi.h:347
int n
Definition: mendes-alt.c:164
static int tune_up_fd(pe_control_t *ctl, int evt_idx)
Definition: perf_event.c:560
#define PAPI_GRN_PROC
Definition: papi.h:362