PAPI  5.3.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perf_event_uncore.c
Go to the documentation of this file.
1 /*
2 * File: perf_event_uncore.c
3 *
4 * Author: Vince Weaver
5 * vincent.weaver@maine.edu
6 */
7 
8 #include <fcntl.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <signal.h>
12 #include <syscall.h>
13 #include <sys/utsname.h>
14 #include <sys/mman.h>
15 #include <sys/ioctl.h>
16 
17 /* PAPI-specific includes */
18 #include "papi.h"
19 #include "papi_memory.h"
20 #include "papi_internal.h"
21 #include "papi_vector.h"
22 #include "extras.h"
23 
24 /* libpfm4 includes */
25 #include "papi_libpfm4_events.h"
26 #include "peu_libpfm4_events.h"
27 #include "perfmon/pfmlib.h"
28 #include PEINCLUDE
29 
30 /* Linux-specific includes */
31 #include "mb.h"
32 #include "linux-memory.h"
33 #include "linux-timer.h"
34 #include "linux-common.h"
35 #include "linux-context.h"
36 
38 
39 /* Forward declaration */
41 
42 /* Globals */
44 static int our_cidx;
45 
46 /* Defines for ctx->state */
47 #define PERF_EVENTS_OPENED 0x01
48 #define PERF_EVENTS_RUNNING 0x02
49 
50 
51 /* The read format on perf_event varies based on various flags that */
52 /* are passed into it. This helper avoids copying this logic */
53 /* multiple places. */
54 static unsigned int
55 get_read_format( unsigned int multiplex,
56  unsigned int inherit,
57  int format_group )
58 {
59  unsigned int format = 0;
60 
61  /* if we need read format options for multiplexing, add them now */
62  if (multiplex) {
63  format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
64  format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
65  }
66 
67  /* If we are not using inherit, add the group read options */
68  if (!inherit) {
69  if (format_group) {
70  format |= PERF_FORMAT_GROUP;
71  }
72  }
73 
74  SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n",
75  multiplex, inherit, format_group, format);
76 
77  return format;
78 }
79 
80 /********************************************************************/
81 /* Low-level perf_event calls */
82 /********************************************************************/
83 
84 /* In case headers aren't new enough to have __NR_perf_event_open */
85 #ifndef __NR_perf_event_open
86 
87 #ifdef __powerpc__
88 #define __NR_perf_event_open 319
89 #elif defined(__x86_64__)
90 #define __NR_perf_event_open 298
91 #elif defined(__i386__)
92 #define __NR_perf_event_open 336
93 #elif defined(__arm__) 366+0x900000
94 #define __NR_perf_event_open
95 #endif
96 
97 #endif
98 
99 static long
100 sys_perf_event_open( struct perf_event_attr *hw_event, pid_t pid, int cpu,
101  int group_fd, unsigned long flags )
102 {
103  int ret;
104 
105  SUBDBG("sys_perf_event_open(%p,%d,%d,%d,%lx\n",hw_event,pid,cpu,group_fd,flags);
106  SUBDBG(" type: %d\n",hw_event->type);
107  SUBDBG(" size: %d\n",hw_event->size);
108  SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",hw_event->config,
109  hw_event->config);
110  SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period);
111  SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type);
112  SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format);
113  SUBDBG(" disabled: %d\n",hw_event->disabled);
114  SUBDBG(" inherit: %d\n",hw_event->inherit);
115  SUBDBG(" pinned: %d\n",hw_event->pinned);
116  SUBDBG(" exclusive: %d\n",hw_event->exclusive);
117  SUBDBG(" exclude_user: %d\n",hw_event->exclude_user);
118  SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel);
119  SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv);
120  SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle);
121  SUBDBG(" mmap: %d\n",hw_event->mmap);
122  SUBDBG(" comm: %d\n",hw_event->comm);
123  SUBDBG(" freq: %d\n",hw_event->freq);
124  SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat);
125  SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec);
126  SUBDBG(" task: %d\n",hw_event->task);
127  SUBDBG(" watermark: %d\n",hw_event->watermark);
128 
129  ret =
130  syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags );
131  SUBDBG("Returned %d %d %s\n",ret,
132  ret<0?errno:0,
133  ret<0?strerror(errno):" ");
134  return ret;
135 }
136 
137 
138 static int map_perf_event_errors_to_papi(int perf_event_error) {
139 
140  int ret;
141 
142  /* These mappings are approximate.
143  EINVAL in particular can mean lots of different things */
144  switch(perf_event_error) {
145  case EPERM:
146  case EACCES:
147  ret = PAPI_EPERM;
148  break;
149  case ENODEV:
150  case EOPNOTSUPP:
151  ret = PAPI_ENOSUPP;
152  break;
153  case ENOENT:
154  ret = PAPI_ENOEVNT;
155  break;
156  case ENOSYS:
157  case EAGAIN:
158  case EBUSY:
159  case E2BIG:
160  ret = PAPI_ESYS;
161  break;
162  case ENOMEM:
163  ret = PAPI_ENOMEM;
164  break;
165  case EINVAL:
166  default:
167  ret = PAPI_EINVAL;
168  break;
169  }
170  return ret;
171 }
172 
173 /* Maximum size we ever expect to read from a perf_event fd */
174 /* (this is the number of 64-bit values) */
175 /* We use this to size the read buffers */
176 /* The three is for event count, time_enabled, time_running */
177 /* and the counter term is count value and count id for each */
178 /* possible counter value. */
179 #define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS))
180 
181 /* Open all events in the control state */
182 static int
184 {
185 
186  int i, ret = PAPI_OK;
187  long pid;
188 
189  if (ctl->granularity==PAPI_GRN_SYS) {
190  pid = -1;
191  }
192  else {
193  pid = ctl->tid;
194  }
195 
196  for( i = 0; i < ctl->num_events; i++ ) {
197 
198  ctl->events[i].event_opened=0;
199 
200  /* set up the attr structure. We don't set up all fields here */
201  /* as some have already been set up previously. */
202 
203  /* group leader (event 0) is special */
204  /* If we're multiplexed, everyone is a group leader */
205  if (( i == 0 ) || (ctl->multiplexed)) {
206  ctl->events[i].attr.pinned = !ctl->multiplexed;
207  ctl->events[i].attr.disabled = 1;
208  ctl->events[i].group_leader_fd=-1;
209  ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed,
210  ctl->inherit,
211  !ctl->multiplexed );
212  } else {
213  ctl->events[i].attr.pinned=0;
214  ctl->events[i].attr.disabled = 0;
215  ctl->events[i].group_leader_fd=ctl->events[0].event_fd,
216  ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed,
217  ctl->inherit,
218  0 );
219  }
220 
221 
222  /* try to open */
223  ctl->events[i].event_fd = sys_perf_event_open( &ctl->events[i].attr,
224  pid,
225  ctl->cpu,
226  ctl->events[i].group_leader_fd,
227  0 /* flags */
228  );
229 
230  /* Try to match Linux errors to PAPI errors */
231  if ( ctl->events[i].event_fd == -1 ) {
232  SUBDBG("sys_perf_event_open returned error on event #%d."
233  " Error: %s\n",
234  i, strerror( errno ) );
236 
237  goto open_pe_cleanup;
238  }
239 
240  SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d,"
241  " group_leader/fd: %d, event_fd: %d,"
242  " read_format: 0x%"PRIu64"\n",
243  pid, ctl->cpu, ctl->events[i].group_leader_fd,
244  ctl->events[i].event_fd, ctl->events[i].attr.read_format);
245 
246  ctl->events[i].event_opened=1;
247  }
248 
249  /* Now that we've successfully opened all of the events, do whatever */
250  /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */
251  /* and so on. */
252  for ( i = 0; i < ctl->num_events; i++ ) {
253 
254  /* No sampling if uncore */
255  ctl->events[i].mmap_buf = NULL;
256  }
257 
258  /* Set num_evts only if completely successful */
259  ctx->state |= PERF_EVENTS_OPENED;
260 
261  return PAPI_OK;
262 
263 open_pe_cleanup:
264  /* We encountered an error, close up the fds we successfully opened. */
265  /* We go backward in an attempt to close group leaders last, although */
266  /* That's probably not strictly necessary. */
267  while ( i > 0 ) {
268  i--;
269  if (ctl->events[i].event_fd>=0) {
270  close( ctl->events[i].event_fd );
271  ctl->events[i].event_opened=0;
272  }
273  }
274 
275  return ret;
276 }
277 
278 /* Close all of the opened events */
279 static int
281 {
282  int i;
283  int num_closed=0;
284  int events_not_opened=0;
285 
286  /* should this be a more serious error? */
287  if ( ctx->state & PERF_EVENTS_RUNNING ) {
288  SUBDBG("Closing without stopping first\n");
289  }
290 
291  /* Close child events first */
292  for( i=0; i<ctl->num_events; i++ ) {
293 
294  if (ctl->events[i].event_opened) {
295 
296  if (ctl->events[i].group_leader_fd!=-1) {
297  if ( ctl->events[i].mmap_buf ) {
298  if ( munmap ( ctl->events[i].mmap_buf,
299  ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
300  PAPIERROR( "munmap of fd = %d returned error: %s",
301  ctl->events[i].event_fd, strerror( errno ) );
302  return PAPI_ESYS;
303  }
304  }
305 
306  if ( close( ctl->events[i].event_fd ) ) {
307  PAPIERROR( "close of fd = %d returned error: %s",
308  ctl->events[i].event_fd, strerror( errno ) );
309  return PAPI_ESYS;
310  } else {
311  num_closed++;
312  }
313  ctl->events[i].event_opened=0;
314  }
315  }
316  else {
317  events_not_opened++;
318  }
319  }
320 
321  /* Close the group leaders last */
322  for( i=0; i<ctl->num_events; i++ ) {
323 
324  if (ctl->events[i].event_opened) {
325 
326  if (ctl->events[i].group_leader_fd==-1) {
327  if ( ctl->events[i].mmap_buf ) {
328  if ( munmap ( ctl->events[i].mmap_buf,
329  ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
330  PAPIERROR( "munmap of fd = %d returned error: %s",
331  ctl->events[i].event_fd, strerror( errno ) );
332  return PAPI_ESYS;
333  }
334  }
335 
336 
337  if ( close( ctl->events[i].event_fd ) ) {
338  PAPIERROR( "close of fd = %d returned error: %s",
339  ctl->events[i].event_fd, strerror( errno ) );
340  return PAPI_ESYS;
341  } else {
342  num_closed++;
343  }
344  ctl->events[i].event_opened=0;
345  }
346  }
347  }
348 
349 
350  if (ctl->num_events!=num_closed) {
351  if (ctl->num_events!=(num_closed+events_not_opened)) {
352  PAPIERROR("Didn't close all events: "
353  "Closed %d Not Opened: %d Expected %d\n",
354  num_closed,events_not_opened,ctl->num_events);
355  return PAPI_EBUG;
356  }
357  }
358 
359  ctl->num_events=0;
360 
361  ctx->state &= ~PERF_EVENTS_OPENED;
362 
363  return PAPI_OK;
364 }
365 
366 
367 
368 
369 /********************************************************************/
370 /* Component Interface */
371 /********************************************************************/
372 
373 
374 
375 /* Initialize a thread */
376 int
378 {
379 
380  pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx;
381 
382  /* clear the context structure and mark as initialized */
383  memset( pe_ctx, 0, sizeof ( pe_context_t ) );
384  pe_ctx->initialized=1;
385 
387  pe_ctx->cidx=our_cidx;
388 
389  return PAPI_OK;
390 }
391 
392 /* Initialize a new control state */
393 int
395 {
396  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
397 
398  /* clear the contents */
399  memset( pe_ctl, 0, sizeof ( pe_control_t ) );
400 
401  /* Set the default domain */
402  _pe_set_domain( ctl, _perf_event_uncore_vector.cmp_info.default_domain );
403 
404  /* Set the default granularity */
405  pe_ctl->granularity=_perf_event_uncore_vector.cmp_info.default_granularity;
406 
407  pe_ctl->cidx=our_cidx;
408 
409  /* Set cpu number in the control block to show events */
410  /* are not tied to specific cpu */
411  pe_ctl->cpu = -1;
412  return PAPI_OK;
413 }
414 
415 
416 
417 /* Initialize the perf_event uncore component */
418 int
420 {
421 
422  int retval;
423  int paranoid_level;
424 
425  FILE *fff;
426 
427  our_cidx=cidx;
428 
429  /* The is the official way to detect if perf_event support exists */
430  /* The file is called perf_counter_paranoid on 2.6.31 */
431  /* currently we are lazy and do not support 2.6.31 kernels */
432 
433  fff=fopen("/proc/sys/kernel/perf_event_paranoid","r");
434  if (fff==NULL) {
435  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
436  "perf_event support not detected",PAPI_MAX_STR_LEN);
437  return PAPI_ENOCMP;
438  }
439  retval=fscanf(fff,"%d",&paranoid_level);
440  if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
441  fclose(fff);
442 
443 
444  /* Run the libpfm4-specific setup */
445 
446  retval = _papi_libpfm4_init(_papi_hwd[cidx]);
447  if (retval) {
448  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
449  "Error initializing libpfm4",PAPI_MAX_STR_LEN);
450  return PAPI_ENOCMP;
451  }
452 
453 
454  /* Run the uncore specific libpfm4 setup */
455 
456  retval = _peu_libpfm4_init(_papi_hwd[cidx],
459  if (retval) {
460  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
461  "Error setting up libpfm4",PAPI_MAX_STR_LEN);
462  return PAPI_ENOCMP;
463  }
464 
465  /* Check if no uncore events found */
466 
467  if (_papi_hwd[cidx]->cmp_info.num_native_events==0) {
468  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
469  "No uncore PMUs or events found",PAPI_MAX_STR_LEN);
470  return PAPI_ENOCMP;
471  }
472 
473  /* Check if we have enough permissions for uncore */
474 
475  /* 2 means no kernel measurements allowed */
476  /* 1 means normal counter access */
477  /* 0 means you can access CPU-specific data */
478  /* -1 means no restrictions */
479 
480  if ((paranoid_level>0) && (getuid()!=0)) {
481  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
482  "Insufficient permissions for uncore access. Set /proc/sys/kernel/perf_event_paranoid to 0 or run as root.",
484  return PAPI_ENOCMP;
485  }
486 
487  return PAPI_OK;
488 
489 }
490 
491 /* Shutdown the perf_event component */
493 
494  /* deallocate our event table */
496 
497  /* Shutdown libpfm4 */
499 
500  return PAPI_OK;
501 }
502 
503 /* This function clears the current contents of the control structure and
504  updates it with whatever resources are allocated for all the native events
505  in the native info structure array. */
506 
507 int
510  int count, hwd_context_t *ctx )
511 {
512  int i = 0, ret;
513  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
514  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
515 
516  /* close all of the existing fds and start over again */
517  /* In theory we could have finer-grained control and know if */
518  /* things were changed, but it's easier to tear things down and rebuild. */
519  close_pe_events( pe_ctx, pe_ctl );
520 
521  /* Calling with count==0 should be OK, it's how things are deallocated */
522  /* when an eventset is destroyed. */
523  if ( count == 0 ) {
524  SUBDBG( "Called with count == 0\n" );
525  return PAPI_OK;
526  }
527 
528  /* set up all the events */
529  for( i = 0; i < count; i++ ) {
530  if ( native ) {
531  /* Have libpfm4 set the config values for the event */
533  native[i].ni_event,
534  pe_ctx->event_table);
535  SUBDBG( "pe_ctl->eventss[%d].config=%"PRIx64"\n",i,
536  pe_ctl->events[i].attr.config);
537  if (ret!=PAPI_OK) return ret;
538 
539  } else {
540  /* I'm not sure how we'd end up in this case */
541  /* should it be an error? */
542  }
543 
544  /* Copy the inherit flag into the attribute block that will be */
545  /* passed to the kernel */
546  pe_ctl->events[i].attr.inherit = pe_ctl->inherit;
547 
548  /* Set the position in the native structure */
549  /* We just set up events linearly */
550  if ( native ) {
551  native[i].ni_position = i;
552  }
553  }
554 
555  pe_ctl->num_events = count;
556  _pe_set_domain( ctl, pe_ctl->domain );
557 
558  /* actuall open the events */
559  /* (why is this a separate function?) */
560  ret = open_pe_events( pe_ctx, pe_ctl );
561  if ( ret != PAPI_OK ) {
562  SUBDBG("open_pe_events failed\n");
563  /* Restore values ? */
564  return ret;
565  }
566 
567  return PAPI_OK;
568 }
569 
570 /********************************************************************/
571 /********************************************************************/
572 /* Start with functions that are exported via the module interface */
573 /********************************************************************/
574 /********************************************************************/
575 
576 
577 /* set the domain. FIXME: perf_events allows per-event control of this. */
578 /* we do not handle that yet. */
579 int
581 {
582 
583  int i;
584  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
585 
586  SUBDBG("old control domain %d, new domain %d\n",
587  pe_ctl->domain,domain);
588 
589  pe_ctl->domain = domain;
590 
591  /* Force the domain on all events */
592  for( i = 0; i < pe_ctl->num_events; i++ ) {
593  pe_ctl->events[i].attr.exclude_user =
594  !( pe_ctl->domain & PAPI_DOM_USER );
595  pe_ctl->events[i].attr.exclude_kernel =
596  !( pe_ctl->domain & PAPI_DOM_KERNEL );
597  pe_ctl->events[i].attr.exclude_hv =
598  !( pe_ctl->domain & PAPI_DOM_SUPERVISOR );
599  }
600  return PAPI_OK;
601 }
602 
603 /* Shutdown a thread */
604 int
606 {
607  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
608 
609  pe_ctx->initialized=0;
610 
611  return PAPI_OK;
612 }
613 
614 
615 /* reset the hardware counters */
616 /* Note: PAPI_reset() does not necessarily call this */
617 /* unless the events are actually running. */
618 int
620 {
621  int i, ret;
622  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
623 
624  ( void ) ctx; /*unused */
625 
626  /* We need to reset all of the events, not just the group leaders */
627  for( i = 0; i < pe_ctl->num_events; i++ ) {
628  ret = ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
629  if ( ret == -1 ) {
630  PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
631  "returned error, Linux says: %s",
632  pe_ctl->events[i].event_fd, strerror( errno ) );
633  return PAPI_ESYS;
634  }
635  }
636 
637  return PAPI_OK;
638 }
639 
640 
641 /* write (set) the hardware counters */
642 /* Current we do not support this. */
643 int
645  long long *from )
646 {
647  ( void ) ctx; /*unused */
648  ( void ) ctl; /*unused */
649  ( void ) from; /*unused */
650  /*
651  * Counters cannot be written. Do we need to virtualize the
652  * counters so that they can be written, or perhaps modify code so that
653  * they can be written? FIXME ?
654  */
655 
656  return PAPI_ENOSUPP;
657 }
658 
659 /*
660  * perf_event provides a complicated read interface.
661  * the info returned by read() varies depending on whether
662  * you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED,
663  * PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set
664  *
665  * To simplify things we just always ask for everything. This might
666  * lead to overhead when reading more than we need, but it makes the
667  * read code a lot simpler than the original implementation we had here.
668  *
669  * For more info on the layout see include/linux/perf_event.h
670  *
671  */
672 
673 int
675  long long **events, int flags )
676 {
677  ( void ) flags; /*unused */
678  int i, ret = -1;
679  /* pe_context_t *pe_ctx = ( pe_context_t *) ctx; */
680  (void) ctx; /*unused*/
681  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
682  long long papi_pe_buffer[READ_BUFFER_SIZE];
683  long long tot_time_running, tot_time_enabled, scale;
684 
685  /* Handle case where we are multiplexing */
686  if (pe_ctl->multiplexed) {
687 
688  /* currently we handle multiplexing by having individual events */
689  /* so we read from each in turn. */
690 
691  for ( i = 0; i < pe_ctl->num_events; i++ ) {
692 
693  ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer,
694  sizeof ( papi_pe_buffer ) );
695  if ( ret == -1 ) {
696  PAPIERROR("read returned an error: ", strerror( errno ));
697  return PAPI_ESYS;
698  }
699 
700  /* We should read 3 64-bit values from the counter */
701  if (ret<(signed)(3*sizeof(long long))) {
702  PAPIERROR("Error! short read!\n");
703  return PAPI_ESYS;
704  }
705 
706  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
707  pe_ctl->events[i].event_fd,
708  (long)pe_ctl->tid, pe_ctl->cpu, ret);
709  SUBDBG("read: %lld %lld %lld\n",papi_pe_buffer[0],
710  papi_pe_buffer[1],papi_pe_buffer[2]);
711 
712  tot_time_enabled = papi_pe_buffer[1];
713  tot_time_running = papi_pe_buffer[2];
714 
715  SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * "
716  "tot_time_enabled %lld) / tot_time_running %lld\n",
717  i, 0,papi_pe_buffer[0],
718  tot_time_enabled,tot_time_running);
719 
720  if (tot_time_running == tot_time_enabled) {
721  /* No scaling needed */
722  pe_ctl->counts[i] = papi_pe_buffer[0];
723  } else if (tot_time_running && tot_time_enabled) {
724  /* Scale factor of 100 to avoid overflows when computing */
725  /*enabled/running */
726 
727  scale = (tot_time_enabled * 100LL) / tot_time_running;
728  scale = scale * papi_pe_buffer[0];
729  scale = scale / 100LL;
730  pe_ctl->counts[i] = scale;
731  } else {
732  /* This should not happen, but Phil reports it sometime does. */
733  SUBDBG("perf_event kernel bug(?) count, enabled, "
734  "running: %lld, %lld, %lld\n",
735  papi_pe_buffer[0],tot_time_enabled,
736  tot_time_running);
737 
738  pe_ctl->counts[i] = papi_pe_buffer[0];
739  }
740  }
741  }
742 
743  /* Handle cases where we cannot use FORMAT GROUP */
744  else if (pe_ctl->inherit) {
745 
746  /* we must read each counter individually */
747  for ( i = 0; i < pe_ctl->num_events; i++ ) {
748 
749  ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer,
750  sizeof ( papi_pe_buffer ) );
751  if ( ret == -1 ) {
752  PAPIERROR("read returned an error: ", strerror( errno ));
753  return PAPI_ESYS;
754  }
755 
756  /* we should read one 64-bit value from each counter */
757  if (ret!=sizeof(long long)) {
758  PAPIERROR("Error! short read!\n");
759  PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
760  pe_ctl->events[i].event_fd,
761  (long)pe_ctl->tid, pe_ctl->cpu, ret);
762  return PAPI_ESYS;
763  }
764 
765  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
766  pe_ctl->events[i].event_fd, (long)pe_ctl->tid,
767  pe_ctl->cpu, ret);
768  SUBDBG("read: %lld\n",papi_pe_buffer[0]);
769 
770  pe_ctl->counts[i] = papi_pe_buffer[0];
771  }
772  }
773 
774 
775  /* Handle cases where we are using FORMAT_GROUP */
776  /* We assume only one group leader, in position 0 */
777 
778  else {
779  if (pe_ctl->events[0].group_leader_fd!=-1) {
780  PAPIERROR("Was expecting group leader!\n");
781  }
782 
783  ret = read( pe_ctl->events[0].event_fd, papi_pe_buffer,
784  sizeof ( papi_pe_buffer ) );
785 
786  if ( ret == -1 ) {
787  PAPIERROR("read returned an error: ", strerror( errno ));
788  return PAPI_ESYS;
789  }
790 
791  /* we read 1 64-bit value (number of events) then */
792  /* num_events more 64-bit values that hold the counts */
793  if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) {
794  PAPIERROR("Error! short read!\n");
795  return PAPI_ESYS;
796  }
797 
798  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
799  pe_ctl->events[0].event_fd,
800  (long)pe_ctl->tid, pe_ctl->cpu, ret);
801  {
802  int j;
803  for(j=0;j<ret/8;j++) {
804  SUBDBG("read %d: %lld\n",j,papi_pe_buffer[j]);
805  }
806  }
807 
808  /* Make sure the kernel agrees with how many events we have */
809  if (papi_pe_buffer[0]!=pe_ctl->num_events) {
810  PAPIERROR("Error! Wrong number of events!\n");
811  return PAPI_ESYS;
812  }
813 
814  /* put the count values in their proper location */
815  for(i=0;i<papi_pe_buffer[0];i++) {
816  pe_ctl->counts[i] = papi_pe_buffer[1+i];
817  }
818  }
819 
820  /* point PAPI to the values we read */
821  *events = pe_ctl->counts;
822 
823  return PAPI_OK;
824 }
825 
826 /* Start counting events */
827 int
829 {
830  int ret;
831  int i;
832  int did_something = 0;
833  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
834  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
835 
836  /* Reset the counters first. Is this necessary? */
837  ret = _pe_reset( pe_ctx, pe_ctl );
838  if ( ret ) {
839  return ret;
840  }
841 
842  /* Enable all of the group leaders */
843  /* All group leaders have a group_leader_fd of -1 */
844  for( i = 0; i < pe_ctl->num_events; i++ ) {
845  if (pe_ctl->events[i].group_leader_fd == -1) {
846  SUBDBG("ioctl(enable): fd: %d\n", pe_ctl->events[i].event_fd);
847  ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_ENABLE, NULL) ;
848 
849  /* ioctls always return -1 on failure */
850  if (ret == -1) {
851  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed.\n");
852  return PAPI_ESYS;
853  }
854 
855  did_something++;
856  }
857  }
858 
859  if (!did_something) {
860  PAPIERROR("Did not enable any counters.\n");
861  return PAPI_EBUG;
862  }
863 
864  pe_ctx->state |= PERF_EVENTS_RUNNING;
865 
866  return PAPI_OK;
867 
868 }
869 
870 /* Stop all of the counters */
871 int
873 {
874 
875  int ret;
876  int i;
877  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
878  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
879 
880  /* Just disable the group leaders */
881  for ( i = 0; i < pe_ctl->num_events; i++ ) {
882  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
883  ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_DISABLE, NULL);
884  if ( ret == -1 ) {
885  PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) "
886  "returned error, Linux says: %s",
887  pe_ctl->events[i].event_fd, strerror( errno ) );
888  return PAPI_EBUG;
889  }
890  }
891  }
892 
893  pe_ctx->state &= ~PERF_EVENTS_RUNNING;
894 
895  return PAPI_OK;
896 }
897 
898 /* Set various options on a control state */
899 int
900 _peu_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
901 {
902  int ret;
903  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
904  pe_control_t *pe_ctl = NULL;
905 
906  switch ( code ) {
907  case PAPI_MULTIPLEX:
908  pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state );
909 
910  pe_ctl->multiplexed = 1;
911  ret = _peu_update_control_state( pe_ctl, NULL,
912  pe_ctl->num_events, pe_ctx );
913  if (ret != PAPI_OK) {
914  pe_ctl->multiplexed = 0;
915  }
916  return ret;
917 
918  case PAPI_ATTACH:
919  pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state );
920 
921  pe_ctl->tid = option->attach.tid;
922 
923  /* If events have been already been added, something may */
924  /* have been done to the kernel, so update */
925  ret =_peu_update_control_state( pe_ctl, NULL,
926  pe_ctl->num_events, pe_ctx);
927 
928  return ret;
929 
930  case PAPI_DETACH:
931  pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
932 
933  pe_ctl->tid = 0;
934  return PAPI_OK;
935 
936  case PAPI_CPU_ATTACH:
937  pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state );
938 
939  /* this tells the kernel not to count for a thread */
940  /* should we warn if we try to set both? perf_event */
941  /* will reject it. */
942  pe_ctl->tid = -1;
943 
944  pe_ctl->cpu = option->cpu.cpu_num;
945 
946  return PAPI_OK;
947 
948  case PAPI_DOMAIN:
949  pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state );
950 
951  /* looks like we are allowed, so set counting domain */
952  return _pe_set_domain( pe_ctl, option->domain.domain );
953 
954  case PAPI_GRANUL:
955  pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state );
956 
957  /* FIXME: we really don't support this yet */
958 
959  switch ( option->granularity.granularity ) {
960  case PAPI_GRN_PROCG:
961  case PAPI_GRN_SYS_CPU:
962  case PAPI_GRN_PROC:
963  return PAPI_ECMP;
964 
965  /* Currently we only support thread and CPU granularity */
966  case PAPI_GRN_SYS:
967  pe_ctl->granularity=PAPI_GRN_SYS;
968  break;
969 
970  case PAPI_GRN_THR:
971  pe_ctl->granularity=PAPI_GRN_THR;
972  break;
973 
974 
975  default:
976  return PAPI_EINVAL;
977  }
978  return PAPI_OK;
979 
980  case PAPI_INHERIT:
981  pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state );
982 
983  if (option->inherit.inherit) {
984  /* children will inherit counters */
985  pe_ctl->inherit = 1;
986  } else {
987  /* children won't inherit counters */
988  pe_ctl->inherit = 0;
989  }
990  return PAPI_OK;
991 
992  case PAPI_DATA_ADDRESS:
993  return PAPI_ENOSUPP;
994 
995  case PAPI_INSTR_ADDRESS:
996  return PAPI_ENOSUPP;
997 
998  case PAPI_DEF_ITIMER:
999  return PAPI_ENOSUPP;
1000 
1001  case PAPI_DEF_MPX_NS:
1002  return PAPI_ENOSUPP;
1003 
1004  case PAPI_DEF_ITIMER_NS:
1005  return PAPI_ENOSUPP;
1006 
1007  default:
1008  return PAPI_ENOSUPP;
1009  }
1010 }
1011 
1012 
1013 int
1014 _peu_ntv_enum_events( unsigned int *PapiEventCode, int modifier )
1015 {
1016 
1017  if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
1018 
1019 
1020  return _peu_libpfm4_ntv_enum_events(PapiEventCode, modifier,
1022 }
1023 
1024 int
1025 _peu_ntv_name_to_code( char *name, unsigned int *event_code) {
1026 
1027  if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
1028 
1029  return _peu_libpfm4_ntv_name_to_code(name,event_code,
1031 }
1032 
1033 int
1034 _peu_ntv_code_to_name(unsigned int EventCode,
1035  char *ntv_name, int len) {
1036 
1037  if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
1038 
1039  return _peu_libpfm4_ntv_code_to_name(EventCode,
1040  ntv_name, len,
1042 }
1043 
1044 int
1045 _peu_ntv_code_to_descr( unsigned int EventCode,
1046  char *ntv_descr, int len) {
1047 
1048  if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
1049 
1050  return _peu_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len,
1052 }
1053 
1054 int
1055 _peu_ntv_code_to_info(unsigned int EventCode,
1056  PAPI_event_info_t *info) {
1057 
1058  if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
1059 
1060  return _peu_libpfm4_ntv_code_to_info(EventCode, info,
1062 }
1063 
1064 /* Our component vector */
1065 
1066 papi_vector_t _perf_event_uncore_vector = {
1067  .cmp_info = {
1068  /* component information (unspecified values initialized to 0) */
1069  .name = "perf_event_uncore",
1070  .short_name = "peu",
1071  .version = "5.0",
1072  .description = "Linux perf_event CPU uncore and northbridge",
1073 
1074  .default_domain = PAPI_DOM_ALL,
1075  .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR,
1076  .default_granularity = PAPI_GRN_SYS,
1077  .available_granularities = PAPI_GRN_SYS,
1078 
1079  .num_mpx_cntrs = PERF_EVENT_MAX_MPX_COUNTERS,
1080 
1081  /* component specific cmp_info initializations */
1082  .fast_virtual_timer = 0,
1083  .attach = 1,
1084  .attach_must_ptrace = 1,
1085  .cpu = 1,
1086  .inherit = 1,
1087  .cntr_umasks = 1,
1088 
1089  },
1090 
1091  /* sizes of framework-opaque component-private structures */
1092  .size = {
1093  .context = sizeof ( pe_context_t ),
1094  .control_state = sizeof ( pe_control_t ),
1095  .reg_value = sizeof ( int ),
1096  .reg_alloc = sizeof ( int ),
1097  },
1098 
1099  /* function pointers in this component */
1100  .init_component = _peu_init_component,
1101  .shutdown_component = _peu_shutdown_component,
1102  .init_thread = _peu_init_thread,
1103  .init_control_state = _peu_init_control_state,
1104  .start = _peu_start,
1105  .stop = _peu_stop,
1106  .read = _peu_read,
1107  .shutdown_thread = _peu_shutdown_thread,
1108  .ctl = _peu_ctl,
1109  .update_control_state = _peu_update_control_state,
1110  .set_domain = _peu_set_domain,
1111  .reset = _peu_reset,
1112  .write = _peu_write,
1113 
1114  /* from counter name mapper */
1115  .ntv_enum_events = _peu_ntv_enum_events,
1116  .ntv_name_to_code = _peu_ntv_name_to_code,
1117  .ntv_code_to_name = _peu_ntv_code_to_name,
1118  .ntv_code_to_descr = _peu_ntv_code_to_descr,
1119  .ntv_code_to_info = _peu_ntv_code_to_info,
1120 };
1121 
1122 
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:625
i inherit inherit
int _peu_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
ssize_t read(int fd, void *buf, size_t count)
Definition: appio.c:225
memset(eventId, 0, size)
int _peu_ntv_name_to_code(char *name, unsigned int *event_code)
static int open_pe_events(pe_context_t *ctx, pe_control_t *ctl)
long long counts[PERF_EVENT_MAX_MPX_COUNTERS]
_papi_int_inherit_t inherit
int errno
int close(int fd)
Definition: appio.c:175
#define PAPI_ENOMEM
Definition: fpapi.h:107
#define PAPI_GRN_SYS_CPU
Definition: fpapi.h:72
int _peu_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len)
#define PAPI_CPU_ATTACH
Definition: papi.h:455
int _pe_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:856
int _peu_shutdown_component(void)
#define PERF_EVENT_MAX_MPX_COUNTERS
Definition: perf_event_lib.h:5
EventSetInfo_t * ESI
unsigned int granularity
long long flags
Definition: iozone.c:12330
#define PAPI_DEF_ITIMER_NS
Definition: papi.h:453
EventSetInfo_t * ESI
int _papi_libpfm4_init(papi_vector_t *my_vector)
int _pe_set_domain(hwd_control_state_t *ctl, int domain)
Definition: perf_event.c:817
static int map_perf_event_errors_to_papi(int perf_event_error)
#define PAPI_INSTR_ADDRESS
Definition: papi.h:451
int _peu_set_domain(hwd_control_state_t *ctl, int domain)
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
#define PAPI_DOM_ALL
Definition: fpapi.h:25
int _peu_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
cpu
Definition: iozone.c:3872
int _peu_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *from)
int default_granularity
Definition: papi.h:641
#define PAPI_ENOEVNT
Definition: fpapi.h:112
#define PAPI_DATA_ADDRESS
Definition: papi.h:450
papi_vector_t _perf_event_uncore_vector
#define PAPI_EPERM
Definition: fpapi.h:120
EventSetInfo_t * ESI
#define PERF_EVENTS_OPENED
pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]
static int close_pe_events(pe_context_t *ctx, pe_control_t *ctl)
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
#define PAPI_ENOCMP
Definition: fpapi.h:122
#define PAPI_GRN_SYS
Definition: fpapi.h:71
fclose(thread_wqfd)
#define PAPI_DOM_KERNEL
Definition: fpapi.h:22
#define PAPI_GRN_PROC
Definition: fpapi.h:69
int _peu_libpfm4_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info, struct native_event_table_t *event_table)
void
Definition: iozone.c:18627
int _peu_libpfm4_ntv_enum_events(unsigned int *PapiEventCode, int modifier, struct native_event_table_t *event_table)
return PAPI_EINVAL
Definition: linux-nvml.c:408
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
int _peu_libpfm4_init(papi_vector_t *my_vector, struct native_event_table_t *event_table, int pmu_type)
int _peu_ntv_enum_events(unsigned int *PapiEventCode, int modifier)
#define PAPI_INHERIT
Definition: papi.h:456
Return codes and api definitions.
uint32_t nr_mmap_pages
FILE * fff[MAX_EVENTS]
unsigned int domain
int multiplex(void)
Definition: multiplex.c:35
_papi_int_attach_t attach
long long ret
Definition: iozone.c:1346
unsigned long tid
_papi_int_cpu_t cpu
int i
Definition: fileop.c:140
#define PAPI_ENOSUPP
Definition: fpapi.h:123
int _papi_libpfm4_shutdown(void)
#define PAPI_GRN_PROCG
Definition: fpapi.h:70
struct native_event_table_t uncore_native_event_table
#define PAPI_DOM_SUPERVISOR
Definition: fpapi.h:24
static int pid
int _peu_libpfm4_ntv_name_to_code(char *name, unsigned int *event_code, struct native_event_table_t *event_table)
static int cidx
Definition: event_info.c:40
int _peu_libpfm4_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len, struct native_event_table_t *event_table)
#define PAPI_ECMP
Definition: fpapi.h:109
static int native
Definition: event_info.c:39
int _peu_libpfm4_setup_counters(struct perf_event_attr *attr, int event, struct native_event_table_t *event_table)
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
_papi_int_granularity_t granularity
int _peu_shutdown_thread(hwd_context_t *ctx)
EventSetInfo_t * ESI
int _peu_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
void PAPIERROR(char *format,...)
unsigned int multiplexed
#define PAPI_DOMAIN
Definition: fpapi.h:50
char events[MAX_EVENTS][BUFSIZ]
int _peu_init_control_state(hwd_control_state_t *ctl)
struct native_event_table_t * event_table
#define PAPI_ATTACH
Definition: fpapi.h:62
int _peu_init_component(int cidx)
EventSetInfo_t * ESI
#define PAPI_GRANUL
Definition: fpapi.h:52
_papi_int_multiplex_t multiplex
#define PERF_EVENTS_RUNNING
#define PAPI_DEF_MPX_NS
Definition: fpapi.h:53
#define PAPI_ESYS
Definition: fpapi.h:108
int _peu_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
again struct sockaddr sizeof(struct sockaddr_in))
#define PAPI_DETACH
Definition: fpapi.h:66
int _peu_init_thread(hwd_context_t *hwd_ctx)
int _peu_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
unsigned int cpu_num
#define PMU_TYPE_UNCORE
#define PAPI_EBUG
Definition: fpapi.h:111
#define PAPI_DEF_ITIMER
Definition: papi.h:452
char * name
Definition: iozone.c:23648
struct perf_event_attr attr
int
Definition: iozone.c:18528
int _peu_libpfm4_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len, struct native_event_table_t *event_table)
static unsigned int get_read_format(unsigned int multiplex, unsigned int inherit, int format_group)
int our_cidx
Definition: perf_event.c:60
int _peu_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
int _peu_libpfm4_shutdown(struct native_event_table_t *event_table)
unsigned int inherit
struct papi_vectors * _papi_hwd[]
_papi_int_domain_t domain
static long sys_perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
int _peu_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len)
#define PAPI_DOM_USER
Definition: fpapi.h:21
#define READ_BUFFER_SIZE
EventSetInfo_t * ESI
hwd_control_state_t * ctl_state
int _peu_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
#define PAPI_GRN_THR
Definition: fpapi.h:67
#define PAPI_MULTIPLEX
Definition: fpapi.h:48