PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perfctr-x86.c File Reference
Include dependency graph for perfctr-x86.c:

Go to the source code of this file.

Macros

#define P4_VEC   "SSE"
 
#define P4_FPU   " X87 SSE_DP"
 
#define AMD_FPU   "SPECULATIVE"
 
#define P4_REPLAY_REAL_MASK   0x00000003
 

Functions

int _perfctr_init_component (int)
 
int _perfctr_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
void _perfctr_dispatch_timer (int signal, hwd_siginfo_t *si, void *context)
 
int _perfctr_init_thread (hwd_context_t *ctx)
 
int _perfctr_shutdown_thread (hwd_context_t *ctx)
 
static int is_pentium4 (void)
 
static void print_alloc (X86_reg_alloc_t *a)
 
void print_control (const struct perfctr_cpu_control *control)
 
static int _x86_init_control_state (hwd_control_state_t *ptr)
 
int _x86_set_domain (hwd_control_state_t *cntrl, int domain)
 
static int _bpt_map_avail (hwd_reg_alloc_t *dst, int ctr)
 
static void _bpt_map_set (hwd_reg_alloc_t *dst, int ctr)
 
static int _bpt_map_exclusive (hwd_reg_alloc_t *dst)
 
static int _bpt_map_shared (hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
 
static void _bpt_map_preempt (hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
 
static void _bpt_map_update (hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
 
static int _x86_allocate_registers (EventSetInfo_t *ESI)
 
static void clear_cs_events (hwd_control_state_t *this_state)
 
static int _x86_update_control_state (hwd_control_state_t *this_state, NativeInfo_t *native, int count, hwd_context_t *ctx)
 
static int _x86_start (hwd_context_t *ctx, hwd_control_state_t *state)
 
static int _x86_stop (hwd_context_t *ctx, hwd_control_state_t *state)
 
static int _x86_read (hwd_context_t *ctx, hwd_control_state_t *spc, long long **dp, int flags)
 
static int _x86_reset (hwd_context_t *ctx, hwd_control_state_t *cntrl)
 
static void swap_events (EventSetInfo_t *ESI, struct hwd_pmc_control *contr, int cntr1, int cntr2)
 
static int _x86_set_overflow (EventSetInfo_t *ESI, int EventIndex, int threshold)
 
static int _x86_stop_profiling (ThreadInfo_t *master, EventSetInfo_t *ESI)
 
static int _pfm_get_counter_info (unsigned int event, unsigned int *selector, int *code)
 
int _papi_libpfm_ntv_code_to_bits_perfctr (unsigned int EventCode, hwd_register_t *newbits)
 

Variables

papi_mdi_t _papi_hwi_system_info
 
papi_vector_t _perfctr_vector
 
pentium4_escr_reg_t pentium4_escrs []
 
pentium4_cccr_reg_t pentium4_cccrs []
 
pentium4_event_t pentium4_events []
 
static pentium4_replay_regs_t p4_replay_regs []
 
static int pfm2intel []
 

Macro Definition Documentation

#define AMD_FPU   "SPECULATIVE"

Definition at line 72 of file perfctr-x86.c.

#define P4_FPU   " X87 SSE_DP"

Definition at line 61 of file perfctr-x86.c.

#define P4_REPLAY_REAL_MASK   0x00000003

Definition at line 903 of file perfctr-x86.c.

#define P4_VEC   "SSE"

Definition at line 51 of file perfctr-x86.c.

Function Documentation

static int _bpt_map_avail ( hwd_reg_alloc_t dst,
int  ctr 
)
static

Definition at line 266 of file perfctr-x86.c.

267 {
268  return ( int ) ( dst->ra_selector & ( 1 << ctr ) );
269 }
static int _bpt_map_exclusive ( hwd_reg_alloc_t dst)
static

Definition at line 295 of file perfctr-x86.c.

296 {
297  return ( dst->ra_rank == 1 );
298 }
static void _bpt_map_preempt ( hwd_reg_alloc_t dst,
hwd_reg_alloc_t src 
)
static

Definition at line 343 of file perfctr-x86.c.

344 {
345  int i;
346  unsigned shared;
347 
348  if ( is_pentium4() ) {
349 #ifdef DEBUG
350  SUBDBG( "src, dst\n" );
351  print_alloc( src );
352  print_alloc( dst );
353 #endif
354 
355  /* check for a pebs conflict */
356  /* pebs enables must both be non-zero */
357  i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
358  /* and not equal to each other */
359  ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
360  /* same for pebs_matrix_vert */
361  ( ( dst->ra_bits.pebs_matrix_vert &&
362  src->ra_bits.pebs_matrix_vert )
363  && ( dst->ra_bits.pebs_matrix_vert !=
364  src->ra_bits.pebs_matrix_vert ) ) );
365  if ( i ) {
366  SUBDBG( "pebs conflict! clearing selector\n" );
367  dst->ra_selector = 0;
368  return;
369  } else {
370  /* remove counters referenced by any shared escrs */
371  if ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
372  ( ( int ) dst->ra_escr[0] != -1 ) ) {
373  dst->ra_selector &= ~dst->ra_bits.counter[0];
374  dst->ra_escr[0] = -1;
375  }
376  if ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
377  ( ( int ) dst->ra_escr[1] != -1 ) ) {
378  dst->ra_selector &= ~dst->ra_bits.counter[1];
379  dst->ra_escr[1] = -1;
380  }
381 
382  /* remove any remaining shared counters */
383  shared = ( dst->ra_selector & src->ra_selector );
384  if ( shared )
385  dst->ra_selector ^= shared;
386  }
387  /* recompute rank */
388  for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
389  if ( dst->ra_selector & ( 1 << i ) )
390  dst->ra_rank++;
391 #ifdef DEBUG
392  SUBDBG( "new dst\n" );
393  print_alloc( dst );
394 #endif
395  } else {
396  shared = dst->ra_selector & src->ra_selector;
397  if ( shared )
398  dst->ra_selector ^= shared;
399  for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
400  if ( dst->ra_selector & ( 1 << i ) )
401  dst->ra_rank++;
402  }
403 }
static void print_alloc(X86_reg_alloc_t *a)
Definition: perfctr-x86.c:87
#define MAX_COUNTERS
Definition: perfctr-x86.h:8
static int is_pentium4(void)
Definition: perfctr-x86.c:75
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the call graph for this function:

static void _bpt_map_set ( hwd_reg_alloc_t dst,
int  ctr 
)
static

Definition at line 275 of file perfctr-x86.c.

276 {
277  dst->ra_selector = ( unsigned int ) ( 1 << ctr );
278  dst->ra_rank = 1;
279 
280  if ( is_pentium4() ) {
281  /* Pentium 4 requires that both an escr and a counter are selected.
282  Find which counter mask contains this counter.
283  Set the opposite escr to empty (-1) */
284  if ( dst->ra_bits.counter[0] & dst->ra_selector )
285  dst->ra_escr[1] = -1;
286  else
287  dst->ra_escr[0] = -1;
288  }
289 }
static int is_pentium4(void)
Definition: perfctr-x86.c:75
int
Definition: iozone.c:18528

Here is the call graph for this function:

static int _bpt_map_shared ( hwd_reg_alloc_t dst,
hwd_reg_alloc_t src 
)
static

Definition at line 305 of file perfctr-x86.c.

306 {
307  if ( is_pentium4() ) {
308  int retval1, retval2;
309  /* Pentium 4 needs to check for conflict of both counters and esc registers */
310  /* selectors must share bits */
311  retval1 = ( ( dst->ra_selector & src->ra_selector ) ||
312  /* or escrs must equal each other and not be set to -1 */
313  ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
314  ( ( int ) dst->ra_escr[0] != -1 ) ) ||
315  ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
316  ( ( int ) dst->ra_escr[1] != -1 ) ) );
317  /* Pentium 4 also needs to check for conflict on pebs registers */
318  /* pebs enables must both be non-zero */
319  retval2 =
320  ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
321  /* and not equal to each other */
322  ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
323  /* same for pebs_matrix_vert */
324  ( ( dst->ra_bits.pebs_matrix_vert &&
325  src->ra_bits.pebs_matrix_vert ) &&
326  ( dst->ra_bits.pebs_matrix_vert !=
327  src->ra_bits.pebs_matrix_vert ) ) );
328  if ( retval2 ) {
329  SUBDBG( "pebs conflict!\n" );
330  }
331  return ( retval1 | retval2 );
332  }
333 
334  return ( int ) ( dst->ra_selector & src->ra_selector );
335 }
static int is_pentium4(void)
Definition: perfctr-x86.c:75
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int
Definition: iozone.c:18528

Here is the call graph for this function:

static void _bpt_map_update ( hwd_reg_alloc_t dst,
hwd_reg_alloc_t src 
)
static

Definition at line 406 of file perfctr-x86.c.

407 {
408  dst->ra_selector = src->ra_selector;
409 
410  if ( is_pentium4() ) {
411  dst->ra_escr[0] = src->ra_escr[0];
412  dst->ra_escr[1] = src->ra_escr[1];
413  }
414 }
static int is_pentium4(void)
Definition: perfctr-x86.c:75

Here is the call graph for this function:

int _papi_libpfm_ntv_code_to_bits_perfctr ( unsigned int  EventCode,
hwd_register_t newbits 
)

Definition at line 1015 of file perfctr-x86.c.

1017 {
1018  unsigned int event, umask;
1019 
1020  X86_register_t *bits = (X86_register_t *)newbits;
1021 
1022  if ( is_pentium4() ) {
1023  pentium4_escr_value_t escr_value;
1024  pentium4_cccr_value_t cccr_value;
1025  unsigned int num_masks, replay_mask, unit_masks[12];
1026  unsigned int event_mask;
1027  unsigned int tag_value, tag_enable;
1028  unsigned int i;
1029  int j, escr, cccr, pmd;
1030 
1031  if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
1032  return PAPI_ENOEVNT;
1033 
1034  /* for each allowed escr (1 or 2) find the allowed cccrs.
1035  for each allowed cccr find the pmd index
1036  convert to an intel counter number; or it into bits->counter */
1037  for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) {
1038  bits->counter[i] = 0;
1039  escr = pentium4_events[event].allowed_escrs[i];
1040  if ( escr < 0 ) {
1041  continue;
1042  }
1043 
1044  bits->escr[i] = escr;
1045 
1046  for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) {
1047  cccr = pentium4_escrs[escr].allowed_cccrs[j];
1048  if ( cccr < 0 ) {
1049  continue;
1050  }
1051 
1052  pmd = pentium4_cccrs[cccr].pmd;
1053  bits->counter[i] |= ( 1 << pfm2intel[pmd] );
1054  }
1055  }
1056 
1057  /* if there's only one valid escr, copy the values */
1058  if ( escr < 0 ) {
1059  bits->escr[1] = bits->escr[0];
1060  bits->counter[1] = bits->counter[0];
1061  }
1062 
1063  /* Calculate the event-mask value. Invalid masks
1064  * specified by the caller are ignored. */
1065  tag_value = 0;
1066  tag_enable = 0;
1067  event_mask = _pfm_convert_umask( event, umask );
1068 
1069  if ( event_mask & 0xF0000 ) {
1070  tag_enable = 1;
1071  tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS );
1072  }
1073 
1074  event_mask &= 0x0FFFF; /* mask off possible tag bits */
1075 
1076  /* Set up the ESCR and CCCR register values. */
1077  escr_value.val = 0;
1078  escr_value.bits.t1_usr = 0; /* controlled by kernel */
1079  escr_value.bits.t1_os = 0; /* controlled by kernel */
1080 // escr_value.bits.t0_usr = (plm & PFM_PLM3) ? 1 : 0;
1081 // escr_value.bits.t0_os = (plm & PFM_PLM0) ? 1 : 0;
1082  escr_value.bits.tag_enable = tag_enable;
1083  escr_value.bits.tag_value = tag_value;
1084  escr_value.bits.event_mask = event_mask;
1085  escr_value.bits.event_select = pentium4_events[event].event_select;
1086  escr_value.bits.reserved = 0;
1087 
1088  /* initialize the proper bits in the cccr register */
1089  cccr_value.val = 0;
1090  cccr_value.bits.reserved1 = 0;
1091  cccr_value.bits.enable = 1;
1092  cccr_value.bits.escr_select = pentium4_events[event].escr_select;
1093  cccr_value.bits.active_thread = 3;
1094  /* FIXME: This is set to count when either logical
1095  * CPU is active. Need a way to distinguish
1096  * between logical CPUs when HT is enabled.
1097  * the docs say these bits should always
1098  * be set. */
1099  cccr_value.bits.compare = 0;
1100  /* FIXME: What do we do with "threshold" settings? */
1101  cccr_value.bits.complement = 0;
1102  /* FIXME: What do we do with "threshold" settings? */
1103  cccr_value.bits.threshold = 0;
1104  /* FIXME: What do we do with "threshold" settings? */
1105  cccr_value.bits.force_ovf = 0;
1106  /* FIXME: Do we want to allow "forcing" overflow
1107  * interrupts on all counter increments? */
1108  cccr_value.bits.ovf_pmi_t0 = 0;
1109  cccr_value.bits.ovf_pmi_t1 = 0;
1110  /* PMI taken care of by kernel typically */
1111  cccr_value.bits.reserved2 = 0;
1112  cccr_value.bits.cascade = 0;
1113  /* FIXME: How do we handle "cascading" counters? */
1114  cccr_value.bits.overflow = 0;
1115 
1116  /* these flags are always zero, from what I can tell... */
1117  bits->pebs_enable = 0; /* flag for PEBS counting */
1118  bits->pebs_matrix_vert = 0;
1119  /* flag for PEBS_MATRIX_VERT, whatever that is */
1120 
1121  /* ...unless the event is replay_event */
1122  if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) {
1123  escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK;
1124  num_masks = prepare_umask( umask, unit_masks );
1125  for ( i = 0; i < num_masks; i++ ) {
1126  replay_mask = unit_masks[i];
1127  if ( replay_mask > 1 && replay_mask < 11 ) {
1128  /* process each valid mask we find */
1129  bits->pebs_enable |= p4_replay_regs[replay_mask].enb;
1130  bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert;
1131  }
1132  }
1133  }
1134 
1135  /* store the escr and cccr values */
1136  bits->event = escr_value.val;
1137  bits->cccr = cccr_value.val;
1138  bits->ireset = 0; /* I don't really know what this does */
1139  SUBDBG( "escr: 0x%lx; cccr: 0x%lx\n", escr_value.val, cccr_value.val );
1140  } else {
1141 
1142  int ret, code;
1143 
1144  if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
1145  return PAPI_ENOEVNT;
1146 
1147  if ( ( ret = _pfm_get_counter_info( event, &bits->selector,
1148  &code ) ) != PAPI_OK )
1149  return ret;
1150 
1151  bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) );
1152 
1153  SUBDBG( "selector: %#x\n", bits->selector );
1154  SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event,
1155  umask, code, ( ( hwd_register_t * ) bits )->counter_cmd );
1156  }
1157 
1158  return PAPI_OK;
1159 }
#define PAPI_ENOEVNT
Definition: papi.h:258
unsigned counter[2]
Definition: perfctr-x86.h:77
static int prepare_umask(unsigned int foo, unsigned int *values)
pentium4_escr_reg_t pentium4_escrs[]
pentium4_cccr_reg_t pentium4_cccrs[]
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
return PAPI_OK
Definition: linux-nvml.c:458
static int is_pentium4(void)
Definition: perfctr-x86.c:75
#define P4_REPLAY_REAL_MASK
Definition: perfctr-x86.c:903
long long ret
Definition: iozone.c:1346
static int _pfm_get_counter_info(unsigned int event, unsigned int *selector, int *code)
Definition: perfctr-x86.c:970
int i
Definition: fileop.c:140
unsigned int selector
Definition: perfctr-x86.h:74
unsigned pebs_enable
Definition: perfctr-x86.h:81
static int pfm2intel[]
Definition: perfctr-x86.c:958
unsigned int _pfm_convert_umask(unsigned int event, unsigned int umask)
unsigned escr[2]
Definition: perfctr-x86.h:78
unsigned cccr
Definition: perfctr-x86.h:79
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
char * name
Definition: iozone.c:23648
static pentium4_replay_regs_t p4_replay_regs[]
Definition: perfctr-x86.c:910
int
Definition: iozone.c:18528
unsigned pebs_matrix_vert
Definition: perfctr-x86.h:82
unsigned ireset
Definition: perfctr-x86.h:83
pentium4_event_t pentium4_events[]
long j
Definition: iozone.c:19135
unsigned event
Definition: perfctr-x86.h:80
static int _pfm_decode_native_event(unsigned int EventCode, unsigned int *event, unsigned int *umask)

Here is the call graph for this function:

int _perfctr_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

Definition at line 289 of file perfctr.c.

290 {
291  ( void ) ctx; /*unused */
292  switch ( code ) {
293  case PAPI_DOMAIN:
294  case PAPI_DEFDOM:
295 #if defined(PPC64)
296  return ( _perfctr_vector.
297  set_domain( option->domain.ESI, option->domain.domain ) );
298 #else
299  return ( _perfctr_vector.
300  set_domain( option->domain.ESI->ctl_state,
301  option->domain.domain ) );
302 #endif
303  case PAPI_GRANUL:
304  case PAPI_DEFGRN:
305  return PAPI_ECMP;
306  case PAPI_ATTACH:
307  return ( attach( option->attach.ESI->ctl_state, option->attach.tid ) );
308  case PAPI_DETACH:
309  return ( detach( option->attach.ESI->ctl_state ) );
310  case PAPI_DEF_ITIMER:
311  {
312  /* flags are currently ignored, eventually the flags will be able
313  to specify whether or not we use POSIX itimers (clock_gettimer) */
314  if ( ( option->itimer.itimer_num == ITIMER_REAL ) &&
315  ( option->itimer.itimer_sig != SIGALRM ) )
316  return PAPI_EINVAL;
317  if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) &&
318  ( option->itimer.itimer_sig != SIGVTALRM ) )
319  return PAPI_EINVAL;
320  if ( ( option->itimer.itimer_num == ITIMER_PROF ) &&
321  ( option->itimer.itimer_sig != SIGPROF ) )
322  return PAPI_EINVAL;
323  if ( option->itimer.ns > 0 )
324  option->itimer.ns = round_requested_ns( option->itimer.ns );
325  /* At this point, we assume the user knows what he or
326  she is doing, they maybe doing something arch specific */
327  return PAPI_OK;
328  }
329  case PAPI_DEF_MPX_NS:
330  {
331  option->multiplex.ns =
332  ( unsigned long ) round_requested_ns( ( int ) option->multiplex.
333  ns );
334  return ( PAPI_OK );
335  }
336  case PAPI_DEF_ITIMER_NS:
337  {
338  option->itimer.ns = round_requested_ns( option->itimer.ns );
339  return ( PAPI_OK );
340  }
341  default:
342  return ( PAPI_ENOSUPP );
343  }
344 }
static int round_requested_ns(int ns)
Definition: perfctr.c:278
EventSetInfo_t * ESI
#define PAPI_DEF_ITIMER_NS
Definition: papi.h:453
#define PAPI_DEF_MPX_NS
Definition: papi.h:434
#define PAPI_ENOSUPP
Definition: papi.h:269
int ns
Definition: iozone.c:20358
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
_papi_int_itimer_t itimer
_papi_int_attach_t attach
unsigned long tid
static int set_domain(hwd_control_state_t *cntrl, unsigned int domain)
#define PAPI_GRANUL
Definition: papi.h:433
#define PAPI_DEFGRN
Definition: papi.h:432
papi_vector_t _perfctr_vector
Definition: perfctr-x86.c:1163
long long
Definition: iozone.c:19827
EventSetInfo_t * ESI
#define PAPI_DETACH
Definition: papi.h:427
#define PAPI_ATTACH
Definition: papi.h:445
#define PAPI_ECMP
Definition: papi.h:254
_papi_int_multiplex_t multiplex
#define PAPI_DOMAIN
Definition: papi.h:431
#define PAPI_DEF_ITIMER
Definition: papi.h:452
#define PAPI_DEFDOM
Definition: papi.h:430
_papi_int_domain_t domain
static int attach(hwd_control_state_t *ctl, unsigned long tid)
Definition: perfctr.c:242
hwd_control_state_t * ctl_state
static int detach(hwd_control_state_t *ctl)
Definition: perfctr.c:271

Here is the call graph for this function:

void _perfctr_dispatch_timer ( int  signal,
hwd_siginfo_t si,
void context 
)
int _perfctr_init_component ( int  )

Definition at line 107 of file perfctr.c.

108 {
109  int retval;
110  struct perfctr_info info;
111  char abiv[PAPI_MIN_STR_LEN];
112 
113 #if defined(PERFCTR26)
114  int fd;
115 #else
116  struct vperfctr *dev;
117 #endif
118 
119 #if defined(PERFCTR26)
120  /* Get info from the kernel */
121  /* Use lower level calls per Mikael to get the perfctr info
122  without actually creating a new kernel-side state.
123  Also, close the fd immediately after retrieving the info.
124  This is much lighter weight and doesn't reserve the counter
125  resources. Also compatible with perfctr 2.6.14.
126  */
127  fd = _vperfctr_open( 0 );
128  if ( fd < 0 ) {
131  return PAPI_ESYS;
132  }
133  retval = perfctr_info( fd, &info );
134  close( fd );
135  if ( retval < 0 ) {
138  return PAPI_ESYS;
139  }
140 
141  /* copy tsc multiplier to local variable */
142  /* this field appears in perfctr 2.6 and higher */
143  tb_scale_factor = ( long long ) info.tsc_to_cpu_mult;
144 #else
145  /* Opened once for all threads. */
146  if ( ( dev = vperfctr_open( ) ) == NULL ) {
149  return PAPI_ESYS;
150  }
151  SUBDBG( "_perfctr_init_component vperfctr_open = %p\n", dev );
152 
153  /* Get info from the kernel */
154  retval = vperfctr_info( dev, &info );
155  if ( retval < 0 ) {
158  return ( PAPI_ESYS );
159  }
160  vperfctr_close( dev );
161 #endif
162 
163  /* Fill in what we can of the papi_system_info. */
165  if ( retval != PAPI_OK )
166  return ( retval );
167 
168  /* Setup memory info */
170  ( int ) info.cpu_type );
171  if ( retval )
172  return ( retval );
173 
174  strcpy( _perfctr_vector.cmp_info.name,"perfctr.c" );
175  strcpy( _perfctr_vector.cmp_info.version, "$Revision$" );
176  sprintf( abiv, "0x%08X", info.abi_version );
178  strcpy( _perfctr_vector.cmp_info.kernel_version, info.driver_version );
182  if ( info.cpu_features & PERFCTR_FEATURE_RDPMC )
184  else
191 #if !defined(PPC64)
192  /* AMD and Intel ia386 processors all support unit mask bits */
194 #endif
195 #if defined(PPC64)
198 #else
200 #endif
203  if ( info.cpu_features & PERFCTR_FEATURE_PCINT )
205  else
207  SUBDBG( "Hardware/OS %s support counter generated interrupts\n",
208  _perfctr_vector.cmp_info.hardware_intr ? "does" : "does not" );
209 
211  PERFCTR_CPU_NAME( &info ) );
212  _papi_hwi_system_info.hw_info.model = ( int ) info.cpu_type;
213 #if defined(PPC64)
215  if ( strlen( _papi_hwi_system_info.hw_info.vendor_string ) == 0 )
217 #else
219  xlate_cpu_type_to_vendor( info.cpu_type );
220 #endif
221 
222  /* Setup presets last. Some platforms depend on earlier info */
223 #if !defined(PPC64)
224 // retval = setup_p3_vector_table(vtable);
225  if ( !retval )
226  retval = _papi_libpfm_init(&_perfctr_vector, cidx );
227 #else
228  /* Setup native and preset events */
229 // retval = ppc64_setup_vector_table(vtable);
230  if ( !retval )
232  if ( !retval )
233  retval = setup_ppc64_presets( info.cpu_type, cidx );
234 #endif
235  if ( retval )
236  return ( retval );
237 
238  return ( PAPI_OK );
239 }
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:626
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
int _papi_libpfm_init(papi_vector_t *my_vector, int cidx)
int available_granularities
Definition: papi.h:643
int close(int fd)
Definition: appio.c:175
#define PERFCTR_CPU_NRCTRS
Definition: perfctr.c:50
unsigned int attach
Definition: papi.h:659
int default_granularity
Definition: papi.h:642
static int xlate_cpu_type_to_vendor(unsigned perfctr_cpu_type)
Definition: perfctr.c:55
#define PAPI_DOM_KERNEL
Definition: papi.h:298
int fd
Definition: iozone.c:1291
return PAPI_OK
Definition: linux-nvml.c:458
long long tb_scale_factor
Definition: perfctr.c:104
#define PAPI_DOM_USER
Definition: papi.h:296
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
unsigned int cntr_umasks
Definition: papi.h:665
char kernel_version[PAPI_MIN_STR_LEN]
Definition: papi.h:632
#define VOPEN_ERROR
Definition: perfctr-x86.h:62
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:633
papi_os_vector_t _papi_os_vector
Definition: aix.c:1288
unsigned int fast_real_timer
Definition: papi.h:657
unsigned int fast_virtual_timer
Definition: papi.h:658
#define PAPI_VENDOR_IBM
Definition: papi.h:348
unsigned int attach_must_ptrace
Definition: papi.h:660
static int cidx
Definition: event_info.c:40
unsigned int fast_counter_read
Definition: papi.h:656
#define PAPI_ESYS
Definition: papi.h:253
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _perfctr_vector
Definition: perfctr-x86.c:1163
long long
Definition: iozone.c:19827
#define PAPI_GRN_THR
Definition: papi.h:360
unsigned int hardware_intr
Definition: papi.h:649
char vendor_string[PAPI_MAX_STR_LEN]
Definition: papi.h:785
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:57
strcpy(filename, default_filename)
PAPI_hw_info_t hw_info
int(* get_system_info)(papi_mdi_t *mdi)
Definition: papi_vector.h:68
int vendor
Definition: papi.h:784
int model
Definition: papi.h:786
int
Definition: iozone.c:18528
#define PAPI_MIN_STR_LEN
Definition: papi.h:462
char version[PAPI_MIN_STR_LEN]
Definition: papi.h:630
int perfctr_ppc64_setup_native_table()
Definition: ppc64_events.c:73
char support_version[PAPI_MIN_STR_LEN]
Definition: papi.h:631
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
char model_string[PAPI_MAX_STR_LEN]
Definition: papi.h:787
int setup_ppc64_presets(int cputype)
#define VINFO_ERROR
Definition: perfctr-x86.h:64
#define PAPI_DOM_SUPERVISOR
Definition: papi.h:300
#define PERFCTR_CPU_NAME
Definition: perfctr.c:49
ssize_t retval
Definition: libasync.c:338
int(* get_memory_info)(PAPI_hw_info_t *, int)
Definition: papi_vector.h:69
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")

Here is the call graph for this function:

int _perfctr_init_thread ( hwd_context_t ctx)

Definition at line 380 of file perfctr.c.

381 {
382  struct vperfctr_control tmp;
383  int error;
384 
385  /* Initialize our thread/process pointer. */
386  if ( ( ctx->perfctr = vperfctr_open( ) ) == NULL ) {
387 #ifdef VPERFCTR_OPEN_CREAT_EXCL
388  /* New versions of perfctr have this, which allows us to
389  get a previously created context, i.e. one created after
390  a fork and now we're inside a new process that has been exec'd */
391  if ( errno ) {
392  if ( ( ctx->perfctr = vperfctr_open_mode( 0 ) ) == NULL ) {
393  return PAPI_ESYS;
394  }
395  } else {
396  return PAPI_ESYS;
397  }
398 #else
399  return PAPI_ESYS;
400 #endif
401  }
402  SUBDBG( "_papi_hwd_init vperfctr_open() = %p\n", ctx->perfctr );
403 
404  /* Initialize the per thread/process virtualized TSC */
405  memset( &tmp, 0x0, sizeof ( tmp ) );
406  tmp.cpu_control.tsc_on = 1;
407 
408 #ifdef VPERFCTR_CONTROL_CLOEXEC
409  tmp.flags = VPERFCTR_CONTROL_CLOEXEC;
410  SUBDBG( "close on exec\t\t\t%u\n", tmp.flags );
411 #endif
412 
413  /* Start the per thread/process virtualized TSC */
414  error = vperfctr_control( ctx->perfctr, &tmp );
415  if ( error < 0 ) {
416  SUBDBG( "starting virtualized TSC; vperfctr_control returns %d\n",
417  error );
418  return PAPI_ESYS;
419  }
420 
421  return PAPI_OK;
422 }
memset(eventId, 0, size)
int errno
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_ESYS
Definition: papi.h:253
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
long long tmp
Definition: iozone.c:12031

Here is the call graph for this function:

int _perfctr_shutdown_thread ( hwd_context_t ctx)

Definition at line 428 of file perfctr.c.

429 {
430 #ifdef DEBUG
431  int retval = vperfctr_unlink( ctx->perfctr );
432  SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr,
433  retval );
434 #else
435  vperfctr_unlink( ctx->perfctr );
436 #endif
437  vperfctr_close( ctx->perfctr );
438  SUBDBG( "_perfctr_shutdown vperfctr_close(%p)\n", ctx->perfctr );
439  memset( ctx, 0x0, sizeof ( hwd_context_t ) );
440  return ( PAPI_OK );
441 }
memset(eventId, 0, size)
return PAPI_OK
Definition: linux-nvml.c:458
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
ssize_t retval
Definition: libasync.c:338

Here is the call graph for this function:

static int _pfm_get_counter_info ( unsigned int  event,
unsigned int selector,
int code 
)
static

Definition at line 970 of file perfctr-x86.c.

971 {
972  pfmlib_regmask_t cnt, impl;
973  unsigned int num;
974  unsigned int i, first = 1;
975  int ret;
976 
977  if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) {
978  PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt,
979  pfm_strerror( ret ) );
980  return PAPI_ESYS;
981  }
982  if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) {
983  PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) );
984  return PAPI_ESYS;
985  }
986  if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) {
987  PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl,
988  pfm_strerror( ret ) );
989  return PAPI_ESYS;
990  }
991 
992  *selector = 0;
993  for ( i = 0; num; i++ ) {
994  if ( pfm_regmask_isset( &impl, i ) )
995  num--;
996  if ( pfm_regmask_isset( &cnt, i ) ) {
997  if ( first ) {
998  if ( ( ret =
999  pfm_get_event_code_counter( event, i,
1000  code ) ) !=
1001  PFMLIB_SUCCESS ) {
1002  PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s",
1003  event, i, code, pfm_strerror( ret ) );
1004  return PAPI_ESYS;
1005  }
1006  first = 0;
1007  }
1008  *selector |= 1 << i;
1009  }
1010  }
1011  return PAPI_OK;
1012 }
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
return PAPI_OK
Definition: linux-nvml.c:458
long long ret
Definition: iozone.c:1346
int i
Definition: fileop.c:140
void int num
Definition: iozone.c:22151
#define PAPI_ESYS
Definition: papi.h:253
void PAPIERROR(char *format,...)

Here is the call graph for this function:

Here is the caller graph for this function:

static int _x86_allocate_registers ( EventSetInfo_t ESI)
static

Definition at line 418 of file perfctr-x86.c.

419 {
420  int i, j, natNum;
421  hwd_reg_alloc_t event_list[MAX_COUNTERS];
423 
424  /* Initialize the local structure needed
425  for counter allocation and optimization. */
426  natNum = ESI->NativeCount;
427 
428  if ( is_pentium4() ) {
429  SUBDBG( "native event count: %d\n", natNum );
430  }
431 
432  for ( i = 0; i < natNum; i++ ) {
433  /* retrieve the mapping information about this native event */
434  _papi_libpfm_ntv_code_to_bits( ( unsigned int ) ESI->NativeInfoArray[i].
435  ni_event, &event_list[i].ra_bits );
436 
437  if ( is_pentium4() ) {
438  /* combine counter bit masks for both esc registers into selector */
439  event_list[i].ra_selector =
440  event_list[i].ra_bits.counter[0] | event_list[i].ra_bits.
441  counter[1];
442  } else {
443  /* make sure register allocator only looks at legal registers */
444  event_list[i].ra_selector =
445  event_list[i].ra_bits.selector & ALLCNTRS;
446 #ifdef PERFCTR_X86_INTEL_CORE2
448  PERFCTR_X86_INTEL_CORE2 )
449  event_list[i].ra_selector |=
450  ( ( event_list[i].ra_bits.
451  selector >> 16 ) << 2 ) & ALLCNTRS;
452 #endif
453  }
454  /* calculate native event rank, which is no. of counters it can live on */
455  event_list[i].ra_rank = 0;
456  for ( j = 0; j < MAX_COUNTERS; j++ ) {
457  if ( event_list[i].ra_selector & ( 1 << j ) ) {
458  event_list[i].ra_rank++;
459  }
460  }
461 
462  if ( is_pentium4() ) {
463  event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0];
464  event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1];
465 #ifdef DEBUG
466  SUBDBG( "i: %d\n", i );
467  print_alloc( &event_list[i] );
468 #endif
469  }
470  }
471  if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) { /* successfully mapped */
472  for ( i = 0; i < natNum; i++ ) {
473 #ifdef PERFCTR_X86_INTEL_CORE2
475  PERFCTR_X86_INTEL_CORE2 )
476  event_list[i].ra_bits.selector = event_list[i].ra_selector;
477 #endif
478 #ifdef DEBUG
479  if ( is_pentium4() ) {
480  SUBDBG( "i: %d\n", i );
481  print_alloc( &event_list[i] );
482  }
483 #endif
484  /* Copy all info about this native event to the NativeInfo struct */
485  ptr = ESI->NativeInfoArray[i].ni_bits;
486  *ptr = event_list[i].ra_bits;
487 
488  if ( is_pentium4() ) {
489  /* The selector contains the counter bit position. Turn it into a number
490  and store it in the first counter value, zeroing the second. */
491  ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1;
492  ptr->counter[1] = 0;
493  }
494 
495  /* Array order on perfctr is event ADD order, not counter #... */
496  ESI->NativeInfoArray[i].ni_position = i;
497  }
498  return PAPI_OK;
499  } else
500  return PAPI_ECNFLCT;
501 }
static void print_alloc(X86_reg_alloc_t *a)
Definition: perfctr-x86.c:87
static int _papi_bipartite_alloc(hwd_reg_alloc_t *event_list, int count, int cidx)
hwd_register_t * ni_bits
#define MAX_COUNTERS
Definition: perfctr-x86.h:8
return PAPI_OK
Definition: linux-nvml.c:458
static int is_pentium4(void)
Definition: perfctr-x86.c:75
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_ECNFLCT
Definition: papi.h:259
NativeInfo_t * NativeInfoArray
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:57
PAPI_hw_info_t hw_info
#define ALLCNTRS
Definition: perfctr-x86.h:35
int model
Definition: papi.h:786
int _papi_libpfm_ntv_code_to_bits(unsigned int EventCode, hwd_register_t *bits)
long j
Definition: iozone.c:19135
char * ptr
Definition: iozone.c:23586

Here is the call graph for this function:

static int _x86_init_control_state ( hwd_control_state_t ptr)
static

Definition at line 119 of file perfctr-x86.c.

120 {
121  int i, def_mode = 0;
122 
123  if ( is_pentium4() ) {
125  def_mode |= ESCR_T0_USR;
127  def_mode |= ESCR_T0_OS;
128 
129  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
130  ptr->control.cpu_control.evntsel_aux[i] |= def_mode;
131  }
132  ptr->control.cpu_control.tsc_on = 1;
133  ptr->control.cpu_control.nractrs = 0;
134  ptr->control.cpu_control.nrictrs = 0;
135 
136 #ifdef VPERFCTR_CONTROL_CLOEXEC
137  ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
138  SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
139 #endif
140  } else {
141 
143  def_mode |= PERF_USR;
145  def_mode |= PERF_OS;
146 
147  ptr->allocated_registers.selector = 0;
148  switch ( _papi_hwi_system_info.hw_info.model ) {
149  case PERFCTR_X86_GENERIC:
150  case PERFCTR_X86_WINCHIP_C6:
151  case PERFCTR_X86_WINCHIP_2:
152  case PERFCTR_X86_VIA_C3:
153  case PERFCTR_X86_INTEL_P5:
154  case PERFCTR_X86_INTEL_P5MMX:
155  case PERFCTR_X86_INTEL_PII:
156  case PERFCTR_X86_INTEL_P6:
157  case PERFCTR_X86_INTEL_PIII:
158 #ifdef PERFCTR_X86_INTEL_CORE
159  case PERFCTR_X86_INTEL_CORE:
160 #endif
161 #ifdef PERFCTR_X86_INTEL_PENTM
162  case PERFCTR_X86_INTEL_PENTM:
163 #endif
164  ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE;
165  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
166  ptr->control.cpu_control.evntsel[i] |= def_mode;
167  ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
168  }
169  break;
170 #ifdef PERFCTR_X86_INTEL_CORE2
171  case PERFCTR_X86_INTEL_CORE2:
172 #endif
173 #ifdef PERFCTR_X86_INTEL_ATOM
174  case PERFCTR_X86_INTEL_ATOM:
175 #endif
176 #ifdef PERFCTR_X86_INTEL_NHLM
177  case PERFCTR_X86_INTEL_NHLM:
178 #endif
179 #ifdef PERFCTR_X86_INTEL_WSTMR
180  case PERFCTR_X86_INTEL_WSTMR:
181 #endif
182 #ifdef PERFCTR_X86_AMD_K8
183  case PERFCTR_X86_AMD_K8:
184 #endif
185 #ifdef PERFCTR_X86_AMD_K8C
186  case PERFCTR_X86_AMD_K8C:
187 #endif
188 #ifdef PERFCTR_X86_AMD_FAM10H /* this is defined in perfctr 2.6.29 */
189  case PERFCTR_X86_AMD_FAM10H:
190 #endif
191  case PERFCTR_X86_AMD_K7:
192  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
193  ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode;
194  ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
195  }
196  break;
197  }
198 #ifdef VPERFCTR_CONTROL_CLOEXEC
199  ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
200  SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
201 #endif
202 
203  /* Make sure the TSC is always on */
204  ptr->control.cpu_control.tsc_on = 1;
205  }
206  return ( PAPI_OK );
207 }
#define PAPI_DOM_KERNEL
Definition: papi.h:298
#define PERF_USR
Definition: perfctr-x86.h:57
return PAPI_OK
Definition: linux-nvml.c:458
static int is_pentium4(void)
Definition: perfctr-x86.c:75
#define PAPI_DOM_USER
Definition: papi.h:296
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define ESCR_T0_OS
Definition: perfctr-x86.h:16
int i
Definition: fileop.c:140
#define PERF_OS
Definition: perfctr-x86.h:56
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _perfctr_vector
Definition: perfctr-x86.c:1163
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:57
PAPI_hw_info_t hw_info
int model
Definition: papi.h:786
int
Definition: iozone.c:18528
#define ESCR_T0_USR
Definition: perfctr-x86.h:17
#define PERF_ENABLE
Definition: perfctr-x86.h:52

Here is the call graph for this function:

static int _x86_read ( hwd_context_t ctx,
hwd_control_state_t spc,
long long **  dp,
int  flags 
)
static

Definition at line 701 of file perfctr-x86.c.

703 {
704  if ( flags & PAPI_PAUSED ) {
705  vperfctr_read_state( ctx->perfctr, &spc->state, NULL );
706  if ( !is_pentium4() ) {
707  unsigned int i = 0;
708  for ( i = 0;
709  i <
710  spc->control.cpu_control.nractrs +
711  spc->control.cpu_control.nrictrs; i++ ) {
712  SUBDBG( "vperfctr_read_state: counter %d = %lld\n", i,
713  spc->state.pmc[i] );
714  }
715  }
716  } else {
717  SUBDBG( "vperfctr_read_ctrs\n" );
718  if ( spc->rvperfctr != NULL ) {
719  rvperfctr_read_ctrs( spc->rvperfctr, &spc->state );
720  } else {
721  vperfctr_read_ctrs( ctx->perfctr, &spc->state );
722  }
723  }
724  *dp = ( long long * ) spc->state.pmc;
725 #ifdef DEBUG
726  {
727  if ( ISLEVEL( DEBUG_SUBSTRATE ) ) {
728  unsigned int i;
729  if ( is_pentium4() ) {
730  for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) {
731  SUBDBG( "raw val hardware index %d is %lld\n", i,
732  ( long long ) spc->state.pmc[i] );
733  }
734  } else {
735  for ( i = 0;
736  i <
737  spc->control.cpu_control.nractrs +
738  spc->control.cpu_control.nrictrs; i++ ) {
739  SUBDBG( "raw val hardware index %d is %lld\n", i,
740  ( long long ) spc->state.pmc[i] );
741  }
742  }
743  }
744  }
745 #endif
746  return ( PAPI_OK );
747 }
long long flags
Definition: iozone.c:12330
return PAPI_OK
Definition: linux-nvml.c:458
static int is_pentium4(void)
Definition: perfctr-x86.c:75
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define ISLEVEL(a)
Definition: papi_debug.h:54
#define PAPI_PAUSED
Definition: papi.h:374
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")
#define DEBUG_SUBSTRATE
Definition: papi_debug.h:27

Here is the call graph for this function:

static int _x86_reset ( hwd_context_t ctx,
hwd_control_state_t cntrl 
)
static

Definition at line 750 of file perfctr-x86.c.

751 {
752  return ( _x86_start( ctx, cntrl ) );
753 }
static int _x86_start(hwd_context_t *ctx, hwd_control_state_t *state)
Definition: perfctr-x86.c:653

Here is the call graph for this function:

int _x86_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

Definition at line 210 of file perfctr-x86.c.

211 {
212  int i, did = 0;
213  int num_cntrs = _perfctr_vector.cmp_info.num_cntrs;
214 
215  /* Clear the current domain set for this event set */
216  /* We don't touch the Enable bit in this code */
217  if ( is_pentium4() ) {
218  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
219  cntrl->control.cpu_control.evntsel_aux[i] &=
220  ~( ESCR_T0_OS | ESCR_T0_USR );
221  }
222 
223  if ( domain & PAPI_DOM_USER ) {
224  did = 1;
225  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
226  cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR;
227  }
228  }
229 
230  if ( domain & PAPI_DOM_KERNEL ) {
231  did = 1;
232  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
233  cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS;
234  }
235  }
236  } else {
237  for ( i = 0; i < num_cntrs; i++ ) {
238  cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR );
239  }
240 
241  if ( domain & PAPI_DOM_USER ) {
242  did = 1;
243  for ( i = 0; i < num_cntrs; i++ ) {
244  cntrl->control.cpu_control.evntsel[i] |= PERF_USR;
245  }
246  }
247 
248  if ( domain & PAPI_DOM_KERNEL ) {
249  did = 1;
250  for ( i = 0; i < num_cntrs; i++ ) {
251  cntrl->control.cpu_control.evntsel[i] |= PERF_OS;
252  }
253  }
254  }
255 
256  if ( !did )
257  return ( PAPI_EINVAL );
258  else
259  return ( PAPI_OK );
260 }
#define PAPI_DOM_KERNEL
Definition: papi.h:298
#define PERF_USR
Definition: perfctr-x86.h:57
return PAPI_OK
Definition: linux-nvml.c:458
static int is_pentium4(void)
Definition: perfctr-x86.c:75
#define PAPI_DOM_USER
Definition: papi.h:296
return PAPI_EINVAL
Definition: linux-nvml.c:408
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define ESCR_T0_OS
Definition: perfctr-x86.h:16
int i
Definition: fileop.c:140
#define PERF_OS
Definition: perfctr-x86.h:56
papi_vector_t _perfctr_vector
Definition: perfctr-x86.c:1163
#define ESCR_T0_USR
Definition: perfctr-x86.h:17

Here is the call graph for this function:

static int _x86_set_overflow ( EventSetInfo_t ESI,
int  EventIndex,
int  threshold 
)
static

Definition at line 805 of file perfctr-x86.c.

806 {
807  hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state );
808  struct hwd_pmc_control *contr = &(ctl->control);
809  int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0;
810  OVFDBG( "EventIndex=%d\n", EventIndex );
811 
812 #ifdef DEBUG
813  if ( is_pentium4() )
814  print_control( &(contr->cpu_control) );
815 #endif
816 
817  /* The correct event to overflow is EventIndex */
819  i = ESI->EventInfoArray[EventIndex].pos[0];
820 
821  if ( i >= ncntrs ) {
822  PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs );
823  return PAPI_EINVAL;
824  }
825 
826  if ( threshold != 0 ) { /* Set an overflow threshold */
828  NEED_CONTEXT,
830  if ( retval != PAPI_OK )
831  return ( retval );
832 
833  /* overflow interrupt occurs on the NEXT event after overflow occurs
834  thus we subtract 1 from the threshold. */
835  contr->cpu_control.ireset[i] = ( -threshold + 1 );
836 
837  if ( is_pentium4() )
838  contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0;
839  else
840  contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE;
841 
842  contr->cpu_control.nrictrs++;
843  contr->cpu_control.nractrs--;
844  nricntrs = ( int ) contr->cpu_control.nrictrs;
845  nracntrs = ( int ) contr->cpu_control.nractrs;
846  contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig;
847 
848  /* move this event to the bottom part of the list if needed */
849  if ( i < nracntrs )
850  swap_events( ESI, contr, i, nracntrs );
851  OVFDBG( "Modified event set\n" );
852  } else {
853  if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) {
854  contr->cpu_control.ireset[i] = 0;
855  contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 );
856  contr->cpu_control.nrictrs--;
857  contr->cpu_control.nractrs++;
858  } else if ( !is_pentium4() &&
859  contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) {
860  contr->cpu_control.ireset[i] = 0;
861  contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE );
862  contr->cpu_control.nrictrs--;
863  contr->cpu_control.nractrs++;
864  }
865 
866  nricntrs = ( int ) contr->cpu_control.nrictrs;
867  nracntrs = ( int ) contr->cpu_control.nractrs;
868 
869  /* move this event to the top part of the list if needed */
870  if ( i >= nracntrs )
871  swap_events( ESI, contr, i, nracntrs - 1 );
872 
873  if ( !nricntrs )
874  contr->si_signo = 0;
875 
876  OVFDBG( "Modified event set\n" );
877 
879  }
880 
881 #ifdef DEBUG
882  if ( is_pentium4() )
883  print_control( &(contr->cpu_control) );
884 #endif
885  OVFDBG( "End of call. Exit code: %d\n", retval );
886  return ( retval );
887 }
#define hwd_pmc_control
Definition: perfctr-x86.h:11
return PAPI_OK
Definition: linux-nvml.c:458
static int is_pentium4(void)
Definition: perfctr-x86.c:75
return PAPI_EINVAL
Definition: linux-nvml.c:408
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
int i
Definition: fileop.c:140
#define OVFDBG(format, args...)
Definition: papi_debug.h:68
papi_vector_t _perfctr_vector
Definition: perfctr-x86.c:1163
void PAPIERROR(char *format,...)
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:401
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:441
EventInfo_t * EventInfoArray
int threshold
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
#define PERF_INT_ENABLE
Definition: perfctr-x86.h:53
#define NEED_CONTEXT
Definition: papi_internal.h:97
int
Definition: iozone.c:18528
void print_control(const struct perfctr_cpu_control *control)
Definition: perfctr-x86.c:96
hwd_control_state_t * ctl_state
ssize_t retval
Definition: libasync.c:338
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")
#define CCCR_OVF_PMI_T0
Definition: perfctr-x86.h:18
static void swap_events(EventSetInfo_t *ESI, struct hwd_pmc_control *contr, int cntr1, int cntr2)
Definition: perfctr-x86.c:762

Here is the call graph for this function:

static int _x86_start ( hwd_context_t ctx,
hwd_control_state_t state 
)
static

Definition at line 653 of file perfctr-x86.c.

654 {
655  int error;
656 #ifdef DEBUG
657  print_control( &state->control.cpu_control );
658 #endif
659 
660  if ( state->rvperfctr != NULL ) {
661  if ( ( error =
662  rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) {
663  SUBDBG( "rvperfctr_control returns: %d\n", error );
665  return ( PAPI_ESYS );
666  }
667  return ( PAPI_OK );
668  }
669 
670  if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) {
671  SUBDBG( "vperfctr_control returns: %d\n", error );
673  return ( PAPI_ESYS );
674  }
675  return ( PAPI_OK );
676 }
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_ESYS
Definition: papi.h:253
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
void PAPIERROR(char *format,...)
#define RCNTRL_ERROR
Definition: perfctr-x86.h:66
void print_control(const struct perfctr_cpu_control *control)
Definition: perfctr-x86.c:96
#define VCNTRL_ERROR
Definition: perfctr-x86.h:65

Here is the call graph for this function:

Here is the caller graph for this function:

static int _x86_stop ( hwd_context_t ctx,
hwd_control_state_t state 
)
static

Definition at line 679 of file perfctr-x86.c.

680 {
681  int error;
682 
683  if ( state->rvperfctr != NULL ) {
684  if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) {
686  return ( PAPI_ESYS );
687  }
688  return ( PAPI_OK );
689  }
690 
691  error = vperfctr_stop( ctx->perfctr );
692  if ( error < 0 ) {
693  SUBDBG( "vperfctr_stop returns: %d\n", error );
695  return ( PAPI_ESYS );
696  }
697  return ( PAPI_OK );
698 }
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_ESYS
Definition: papi.h:253
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
void PAPIERROR(char *format,...)
#define RCNTRL_ERROR
Definition: perfctr-x86.h:66
#define VCNTRL_ERROR
Definition: perfctr-x86.h:65

Here is the call graph for this function:

static int _x86_stop_profiling ( ThreadInfo_t master,
EventSetInfo_t ESI 
)
static

Definition at line 890 of file perfctr-x86.c.

891 {
892  ( void ) master; /*unused */
893  ( void ) ESI; /*unused */
894  return ( PAPI_OK );
895 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
static int _x86_update_control_state ( hwd_control_state_t this_state,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
)
static

Definition at line 550 of file perfctr-x86.c.

553 {
554  ( void ) ctx; /*unused */
555  unsigned int i, k, retval = PAPI_OK;
556  hwd_register_t *bits,*bits2;
557  struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control;
558 
559  /* clear out the events from the control state */
560  clear_cs_events( this_state );
561 
562  if ( is_pentium4() ) {
563  /* fill the counters we're using */
564  for ( i = 0; i < ( unsigned int ) count; i++ ) {
565  /* dereference the mapping information about this native event */
566  bits = native[i].ni_bits;
567 
568  /* Add counter control command values to eventset */
569  cpu_control->pmc_map[i] = bits->counter[0];
570  cpu_control->evntsel[i] = bits->cccr;
571  cpu_control->ireset[i] = bits->ireset;
572  cpu_control->pmc_map[i] |= FAST_RDPMC;
573  cpu_control->evntsel_aux[i] |= bits->event;
574 
575  /* pebs_enable and pebs_matrix_vert are shared registers used for replay_events.
576  Replay_events count L1 and L2 cache events. There is only one of each for
577  the entire eventset. Therefore, there can be only one unique replay_event
578  per eventset. This means L1 and L2 can't be counted together. Which stinks.
579  This conflict should be trapped in the allocation scheme, but we'll test for it
580  here too, just in case. */
581  if ( bits->pebs_enable ) {
582  /* if pebs_enable isn't set, just copy */
583  if ( cpu_control->p4.pebs_enable == 0 ) {
584  cpu_control->p4.pebs_enable = bits->pebs_enable;
585  /* if pebs_enable conflicts, flag an error */
586  } else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) {
587  SUBDBG
588  ( "WARNING: P4_update_control_state -- pebs_enable conflict!" );
589  retval = PAPI_ECNFLCT;
590  }
591  /* if pebs_enable == bits->pebs_enable, do nothing */
592  }
593  if ( bits->pebs_matrix_vert ) {
594  /* if pebs_matrix_vert isn't set, just copy */
595  if ( cpu_control->p4.pebs_matrix_vert == 0 ) {
596  cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert;
597  /* if pebs_matrix_vert conflicts, flag an error */
598  } else if ( cpu_control->p4.pebs_matrix_vert !=
599  bits->pebs_matrix_vert ) {
600  SUBDBG
601  ( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" );
602  retval = PAPI_ECNFLCT;
603  }
604  /* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */
605  }
606  }
607  this_state->control.cpu_control.nractrs = count;
608 
609  /* Make sure the TSC is always on */
610  this_state->control.cpu_control.tsc_on = 1;
611 
612 #ifdef DEBUG
613  print_control( &this_state->control.cpu_control );
614 #endif
615  } else {
616  switch ( _papi_hwi_system_info.hw_info.model ) {
617 #ifdef PERFCTR_X86_INTEL_CORE2
618  case PERFCTR_X86_INTEL_CORE2:
619  /* fill the counters we're using */
620  for ( i = 0; i < ( unsigned int ) count; i++ ) {
621  bits2 = native[i].ni_bits;
622  for ( k = 0; k < MAX_COUNTERS; k++ )
623  if ( bits2->selector & ( 1 << k ) ) {
624  break;
625  }
626  if ( k > 1 )
627  this_state->control.cpu_control.pmc_map[i] =
628  ( k - 2 ) | 0x40000000;
629  else
630  this_state->control.cpu_control.pmc_map[i] = k;
631 
632  /* Add counter control command values to eventset */
633  this_state->control.cpu_control.evntsel[i] |=
634  bits2->counter_cmd;
635  }
636  break;
637 #endif
638  default:
639  /* fill the counters we're using */
640  for ( i = 0; i < ( unsigned int ) count; i++ ) {
641  /* Add counter control command values to eventset */
642  bits2 = native[i].ni_bits;
643  this_state->control.cpu_control.evntsel[i] |=
644  bits2->counter_cmd;
645  }
646  }
647  this_state->control.cpu_control.nractrs = ( unsigned int ) count;
648  }
649  return retval;
650 }
hwd_register_t * ni_bits
#define MAX_COUNTERS
Definition: perfctr-x86.h:8
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
int event[MAX_COUNTERS]
Definition: solaris-ultra.h:47
static int is_pentium4(void)
Definition: perfctr-x86.c:75
void
Definition: iozone.c:18627
static void clear_cs_events(hwd_control_state_t *this_state)
Definition: perfctr-x86.c:504
int i
Definition: fileop.c:140
int k
Definition: iozone.c:19136
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_ECNFLCT
Definition: papi.h:259
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:57
PAPI_hw_info_t hw_info
int model
Definition: papi.h:786
int
Definition: iozone.c:18528
#define FAST_RDPMC
Definition: perfctr-x86.h:19
void print_control(const struct perfctr_cpu_control *control)
Definition: perfctr-x86.c:96
ssize_t retval
Definition: libasync.c:338

Here is the call graph for this function:

static void clear_cs_events ( hwd_control_state_t this_state)
static

Definition at line 504 of file perfctr-x86.c.

505 {
506  unsigned int i, j;
507 
508  /* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */
509  j = this_state->control.cpu_control.nractrs +
510  this_state->control.cpu_control.nrictrs;
511 
512  /* Remove all counter control command values from eventset. */
513  for ( i = 0; i < j; i++ ) {
514  SUBDBG( "Clearing pmc event entry %d\n", i );
515  if ( is_pentium4() ) {
516  this_state->control.cpu_control.pmc_map[i] = 0;
517  this_state->control.cpu_control.evntsel[i] = 0;
518  this_state->control.cpu_control.evntsel_aux[i] =
519  this_state->control.cpu_control.
520  evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR );
521  } else {
522  this_state->control.cpu_control.pmc_map[i] = i;
523  this_state->control.cpu_control.evntsel[i]
524  = this_state->control.cpu_control.
525  evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR );
526  }
527  this_state->control.cpu_control.ireset[i] = 0;
528  }
529 
530  if ( is_pentium4() ) {
531  /* Clear pebs stuff */
532  this_state->control.cpu_control.p4.pebs_enable = 0;
533  this_state->control.cpu_control.p4.pebs_matrix_vert = 0;
534  }
535 
536  /* clear both a and i counter counts */
537  this_state->control.cpu_control.nractrs = 0;
538  this_state->control.cpu_control.nrictrs = 0;
539 
540 #ifdef DEBUG
541  if ( is_pentium4() )
542  print_control( &this_state->control.cpu_control );
543 #endif
544 }
#define PERF_USR
Definition: perfctr-x86.h:57
static int is_pentium4(void)
Definition: perfctr-x86.c:75
#define ESCR_T0_OS
Definition: perfctr-x86.h:16
int i
Definition: fileop.c:140
#define PERF_OS
Definition: perfctr-x86.h:56
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define ESCR_T0_USR
Definition: perfctr-x86.h:17
void print_control(const struct perfctr_cpu_control *control)
Definition: perfctr-x86.c:96
long j
Definition: iozone.c:19135
#define PERF_ENABLE
Definition: perfctr-x86.h:52

Here is the call graph for this function:

Here is the caller graph for this function:

static int is_pentium4 ( void  )
inlinestatic

Definition at line 75 of file perfctr-x86.c.

75  {
78  return 1;
79  }
80 
81  return 0;
82 
83 }
int cpuid_family
Definition: papi.h:789
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:57
PAPI_hw_info_t hw_info
#define PAPI_VENDOR_INTEL
Definition: papi.h:346
int vendor
Definition: papi.h:784

Here is the caller graph for this function:

static void print_alloc ( X86_reg_alloc_t a)
static

Definition at line 87 of file perfctr-x86.c.

88 {
89  SUBDBG( "X86_reg_alloc:\n" );
90  SUBDBG( " selector: %#x\n", a->ra_selector );
91  SUBDBG( " rank: %#x\n", a->ra_rank );
92  SUBDBG( " escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] );
93 }
unsigned ra_selector
Definition: perfctr-x86.h:89
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
unsigned ra_rank
Definition: perfctr-x86.h:90
unsigned ra_escr[2]
Definition: perfctr-x86.h:92

Here is the caller graph for this function:

void print_control ( const struct perfctr_cpu_control *  control)

Definition at line 96 of file perfctr-x86.c.

97 {
98  unsigned int i;
99  SUBDBG( "Control used:\n" );
100  SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on );
101  SUBDBG( "nractrs\t\t\t%u\n", control->nractrs );
102  SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs );
103 
104  for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) {
105  if ( control->pmc_map[i] >= 18 ) {
106  SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] );
107  } else {
108  SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] );
109  }
110  SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] );
111  if ( control->ireset[i] ) {
112  SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] );
113  }
114  }
115 }
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

static void swap_events ( EventSetInfo_t ESI,
struct hwd_pmc_control contr,
int  cntr1,
int  cntr2 
)
static

Definition at line 762 of file perfctr-x86.c.

764 {
765  unsigned int ui;
766  int si, i, j;
767 
768  for ( i = 0; i < ESI->NativeCount; i++ ) {
769  if ( ESI->NativeInfoArray[i].ni_position == cntr1 )
770  ESI->NativeInfoArray[i].ni_position = cntr2;
771  else if ( ESI->NativeInfoArray[i].ni_position == cntr2 )
772  ESI->NativeInfoArray[i].ni_position = cntr1;
773  }
774 
775  for ( i = 0; i < ESI->NumberOfEvents; i++ ) {
776  for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) {
777  if ( ESI->EventInfoArray[i].pos[j] == cntr1 )
778  ESI->EventInfoArray[i].pos[j] = cntr2;
779  else if ( ESI->EventInfoArray[i].pos[j] == cntr2 )
780  ESI->EventInfoArray[i].pos[j] = cntr1;
781  }
782  }
783 
784  ui = contr->cpu_control.pmc_map[cntr1];
785  contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2];
786  contr->cpu_control.pmc_map[cntr2] = ui;
787 
788  ui = contr->cpu_control.evntsel[cntr1];
789  contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2];
790  contr->cpu_control.evntsel[cntr2] = ui;
791 
792  if ( is_pentium4() ) {
793  ui = contr->cpu_control.evntsel_aux[cntr1];
794  contr->cpu_control.evntsel_aux[cntr1] =
795  contr->cpu_control.evntsel_aux[cntr2];
796  contr->cpu_control.evntsel_aux[cntr2] = ui;
797  }
798 
799  si = contr->cpu_control.ireset[cntr1];
800  contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2];
801  contr->cpu_control.ireset[cntr2] = si;
802 }
static int is_pentium4(void)
Definition: perfctr-x86.c:75
int i
Definition: fileop.c:140
NativeInfo_t * NativeInfoArray
EventInfo_t * EventInfoArray
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
long j
Definition: iozone.c:19135

Here is the call graph for this function:

Here is the caller graph for this function:

Variable Documentation

papi_mdi_t _papi_hwi_system_info

Definition at line 57 of file papi_internal.c.

papi_vector_t _perfctr_vector

Definition at line 1163 of file perfctr-x86.c.

pentium4_replay_regs_t p4_replay_regs[]
static

Definition at line 910 of file perfctr-x86.c.

pentium4_cccr_reg_t pentium4_cccrs[]
pentium4_escr_reg_t pentium4_escrs[]
pentium4_event_t pentium4_events[]
int pfm2intel[]
static
Initial value:
=
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 }

Definition at line 958 of file perfctr-x86.c.