PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perfctr-x86.c
Go to the documentation of this file.
1 /*
2 * File: perfctr-x86.c
3 * Author: Brian Sheely
4 * bsheely@eecs.utk.edu
5 * Mods: <your name here>
6 * <your email address>
7 */
8 
9 #include <string.h>
10 #include <linux/unistd.h>
11 
12 #include "papi.h"
13 #include "papi_memory.h"
14 #include "papi_internal.h"
15 #include "perfctr-x86.h"
16 #include "perfmon/pfmlib.h"
17 #include "extras.h"
18 #include "papi_vector.h"
19 #include "papi_libpfm_events.h"
20 
21 #include "papi_preset.h"
22 #include "linux-memory.h"
23 
24 /* Contains source for the Modified Bipartite Allocation scheme */
25 #include "papi_bipartite.h"
26 
27 /* Prototypes for entry points found in perfctr.c */
28 extern int _perfctr_init_component( int );
29 extern int _perfctr_ctl( hwd_context_t * ctx, int code,
30  _papi_int_option_t * option );
31 extern void _perfctr_dispatch_timer( int signal, hwd_siginfo_t * si,
32  void *context );
33 
34 extern int _perfctr_init_thread( hwd_context_t * ctx );
35 extern int _perfctr_shutdown_thread( hwd_context_t * ctx );
36 
37 #include "linux-common.h"
38 #include "linux-timer.h"
39 
41 
43 
44 #if defined(PERFCTR26)
45 #define evntsel_aux p4.escr
46 #endif
47 
48 #if defined(PAPI_PENTIUM4_VEC_MMX)
49 #define P4_VEC "MMX"
50 #else
51 #define P4_VEC "SSE"
52 #endif
53 
54 #if defined(PAPI_PENTIUM4_FP_X87)
55 #define P4_FPU " X87"
56 #elif defined(PAPI_PENTIUM4_FP_X87_SSE_SP)
57 #define P4_FPU " X87 SSE_SP"
58 #elif defined(PAPI_PENTIUM4_FP_SSE_SP_DP)
59 #define P4_FPU " SSE_SP SSE_DP"
60 #else
61 #define P4_FPU " X87 SSE_DP"
62 #endif
63 
64 /* CODE TO SUPPORT CUSTOMIZABLE FP COUNTS ON OPTERON */
65 #if defined(PAPI_OPTERON_FP_RETIRED)
66 #define AMD_FPU "RETIRED"
67 #elif defined(PAPI_OPTERON_FP_SSE_SP)
68 #define AMD_FPU "SSE_SP"
69 #elif defined(PAPI_OPTERON_FP_SSE_DP)
70 #define AMD_FPU "SSE_DP"
71 #else
72 #define AMD_FPU "SPECULATIVE"
73 #endif
74 
75 static inline int is_pentium4(void) {
76  if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) &&
77  ( _papi_hwi_system_info.hw_info.cpuid_family == 15 )) {
78  return 1;
79  }
80 
81  return 0;
82 
83 }
84 
85 #ifdef DEBUG
86 static void
88 {
89  SUBDBG( "X86_reg_alloc:\n" );
90  SUBDBG( " selector: %#x\n", a->ra_selector );
91  SUBDBG( " rank: %#x\n", a->ra_rank );
92  SUBDBG( " escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] );
93 }
94 
95 void
96 print_control( const struct perfctr_cpu_control *control )
97 {
98  unsigned int i;
99  SUBDBG( "Control used:\n" );
100  SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on );
101  SUBDBG( "nractrs\t\t\t%u\n", control->nractrs );
102  SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs );
103 
104  for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) {
105  if ( control->pmc_map[i] >= 18 ) {
106  SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] );
107  } else {
108  SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] );
109  }
110  SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] );
111  if ( control->ireset[i] ) {
112  SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] );
113  }
114  }
115 }
116 #endif
117 
118 static int
120 {
121  int i, def_mode = 0;
122 
123  if ( is_pentium4() ) {
124  if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
125  def_mode |= ESCR_T0_USR;
126  if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
127  def_mode |= ESCR_T0_OS;
128 
129  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
130  ptr->control.cpu_control.evntsel_aux[i] |= def_mode;
131  }
132  ptr->control.cpu_control.tsc_on = 1;
133  ptr->control.cpu_control.nractrs = 0;
134  ptr->control.cpu_control.nrictrs = 0;
135 
136 #ifdef VPERFCTR_CONTROL_CLOEXEC
137  ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
138  SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
139 #endif
140  } else {
141 
142  if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
143  def_mode |= PERF_USR;
144  if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
145  def_mode |= PERF_OS;
146 
147  ptr->allocated_registers.selector = 0;
148  switch ( _papi_hwi_system_info.hw_info.model ) {
149  case PERFCTR_X86_GENERIC:
150  case PERFCTR_X86_WINCHIP_C6:
151  case PERFCTR_X86_WINCHIP_2:
152  case PERFCTR_X86_VIA_C3:
153  case PERFCTR_X86_INTEL_P5:
154  case PERFCTR_X86_INTEL_P5MMX:
155  case PERFCTR_X86_INTEL_PII:
156  case PERFCTR_X86_INTEL_P6:
157  case PERFCTR_X86_INTEL_PIII:
158 #ifdef PERFCTR_X86_INTEL_CORE
159  case PERFCTR_X86_INTEL_CORE:
160 #endif
161 #ifdef PERFCTR_X86_INTEL_PENTM
162  case PERFCTR_X86_INTEL_PENTM:
163 #endif
164  ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE;
165  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
166  ptr->control.cpu_control.evntsel[i] |= def_mode;
167  ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
168  }
169  break;
170 #ifdef PERFCTR_X86_INTEL_CORE2
171  case PERFCTR_X86_INTEL_CORE2:
172 #endif
173 #ifdef PERFCTR_X86_INTEL_ATOM
174  case PERFCTR_X86_INTEL_ATOM:
175 #endif
176 #ifdef PERFCTR_X86_INTEL_NHLM
177  case PERFCTR_X86_INTEL_NHLM:
178 #endif
179 #ifdef PERFCTR_X86_INTEL_WSTMR
180  case PERFCTR_X86_INTEL_WSTMR:
181 #endif
182 #ifdef PERFCTR_X86_AMD_K8
183  case PERFCTR_X86_AMD_K8:
184 #endif
185 #ifdef PERFCTR_X86_AMD_K8C
186  case PERFCTR_X86_AMD_K8C:
187 #endif
188 #ifdef PERFCTR_X86_AMD_FAM10H /* this is defined in perfctr 2.6.29 */
189  case PERFCTR_X86_AMD_FAM10H:
190 #endif
191  case PERFCTR_X86_AMD_K7:
192  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
193  ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode;
194  ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
195  }
196  break;
197  }
198 #ifdef VPERFCTR_CONTROL_CLOEXEC
199  ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
200  SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
201 #endif
202 
203  /* Make sure the TSC is always on */
204  ptr->control.cpu_control.tsc_on = 1;
205  }
206  return ( PAPI_OK );
207 }
208 
209 int
210 _x86_set_domain( hwd_control_state_t * cntrl, int domain )
211 {
212  int i, did = 0;
213  int num_cntrs = _perfctr_vector.cmp_info.num_cntrs;
214 
215  /* Clear the current domain set for this event set */
216  /* We don't touch the Enable bit in this code */
217  if ( is_pentium4() ) {
218  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
219  cntrl->control.cpu_control.evntsel_aux[i] &=
220  ~( ESCR_T0_OS | ESCR_T0_USR );
221  }
222 
223  if ( domain & PAPI_DOM_USER ) {
224  did = 1;
225  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
226  cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR;
227  }
228  }
229 
230  if ( domain & PAPI_DOM_KERNEL ) {
231  did = 1;
232  for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
233  cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS;
234  }
235  }
236  } else {
237  for ( i = 0; i < num_cntrs; i++ ) {
238  cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR );
239  }
240 
241  if ( domain & PAPI_DOM_USER ) {
242  did = 1;
243  for ( i = 0; i < num_cntrs; i++ ) {
244  cntrl->control.cpu_control.evntsel[i] |= PERF_USR;
245  }
246  }
247 
248  if ( domain & PAPI_DOM_KERNEL ) {
249  did = 1;
250  for ( i = 0; i < num_cntrs; i++ ) {
251  cntrl->control.cpu_control.evntsel[i] |= PERF_OS;
252  }
253  }
254  }
255 
256  if ( !did )
257  return ( PAPI_EINVAL );
258  else
259  return ( PAPI_OK );
260 }
261 
262 /* This function examines the event to determine
263  if it can be mapped to counter ctr.
264  Returns true if it can, false if it can't. */
265 static int
267 {
268  return ( int ) ( dst->ra_selector & ( 1 << ctr ) );
269 }
270 
271 /* This function forces the event to
272  be mapped to only counter ctr.
273  Returns nothing. */
274 static void
275 _bpt_map_set( hwd_reg_alloc_t * dst, int ctr )
276 {
277  dst->ra_selector = ( unsigned int ) ( 1 << ctr );
278  dst->ra_rank = 1;
279 
280  if ( is_pentium4() ) {
281  /* Pentium 4 requires that both an escr and a counter are selected.
282  Find which counter mask contains this counter.
283  Set the opposite escr to empty (-1) */
284  if ( dst->ra_bits.counter[0] & dst->ra_selector )
285  dst->ra_escr[1] = -1;
286  else
287  dst->ra_escr[0] = -1;
288  }
289 }
290 
291 /* This function examines the event to determine
292  if it has a single exclusive mapping.
293  Returns true if exlusive, false if non-exclusive. */
294 static int
296 {
297  return ( dst->ra_rank == 1 );
298 }
299 
300 /* This function compares the dst and src events
301  to determine if any resources are shared. Typically the src event
302  is exclusive, so this detects a conflict if true.
303  Returns true if conflict, false if no conflict. */
304 static int
306 {
307  if ( is_pentium4() ) {
308  int retval1, retval2;
309  /* Pentium 4 needs to check for conflict of both counters and esc registers */
310  /* selectors must share bits */
311  retval1 = ( ( dst->ra_selector & src->ra_selector ) ||
312  /* or escrs must equal each other and not be set to -1 */
313  ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
314  ( ( int ) dst->ra_escr[0] != -1 ) ) ||
315  ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
316  ( ( int ) dst->ra_escr[1] != -1 ) ) );
317  /* Pentium 4 also needs to check for conflict on pebs registers */
318  /* pebs enables must both be non-zero */
319  retval2 =
320  ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
321  /* and not equal to each other */
322  ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
323  /* same for pebs_matrix_vert */
324  ( ( dst->ra_bits.pebs_matrix_vert &&
325  src->ra_bits.pebs_matrix_vert ) &&
326  ( dst->ra_bits.pebs_matrix_vert !=
327  src->ra_bits.pebs_matrix_vert ) ) );
328  if ( retval2 ) {
329  SUBDBG( "pebs conflict!\n" );
330  }
331  return ( retval1 | retval2 );
332  }
333 
334  return ( int ) ( dst->ra_selector & src->ra_selector );
335 }
336 
337 /* This function removes shared resources available to the src event
338  from the resources available to the dst event,
339  and reduces the rank of the dst event accordingly. Typically,
340  the src event will be exclusive, but the code shouldn't assume it.
341  Returns nothing. */
342 static void
344 {
345  int i;
346  unsigned shared;
347 
348  if ( is_pentium4() ) {
349 #ifdef DEBUG
350  SUBDBG( "src, dst\n" );
351  print_alloc( src );
352  print_alloc( dst );
353 #endif
354 
355  /* check for a pebs conflict */
356  /* pebs enables must both be non-zero */
357  i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
358  /* and not equal to each other */
359  ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
360  /* same for pebs_matrix_vert */
361  ( ( dst->ra_bits.pebs_matrix_vert &&
362  src->ra_bits.pebs_matrix_vert )
363  && ( dst->ra_bits.pebs_matrix_vert !=
364  src->ra_bits.pebs_matrix_vert ) ) );
365  if ( i ) {
366  SUBDBG( "pebs conflict! clearing selector\n" );
367  dst->ra_selector = 0;
368  return;
369  } else {
370  /* remove counters referenced by any shared escrs */
371  if ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
372  ( ( int ) dst->ra_escr[0] != -1 ) ) {
373  dst->ra_selector &= ~dst->ra_bits.counter[0];
374  dst->ra_escr[0] = -1;
375  }
376  if ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
377  ( ( int ) dst->ra_escr[1] != -1 ) ) {
378  dst->ra_selector &= ~dst->ra_bits.counter[1];
379  dst->ra_escr[1] = -1;
380  }
381 
382  /* remove any remaining shared counters */
383  shared = ( dst->ra_selector & src->ra_selector );
384  if ( shared )
385  dst->ra_selector ^= shared;
386  }
387  /* recompute rank */
388  for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
389  if ( dst->ra_selector & ( 1 << i ) )
390  dst->ra_rank++;
391 #ifdef DEBUG
392  SUBDBG( "new dst\n" );
393  print_alloc( dst );
394 #endif
395  } else {
396  shared = dst->ra_selector & src->ra_selector;
397  if ( shared )
398  dst->ra_selector ^= shared;
399  for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
400  if ( dst->ra_selector & ( 1 << i ) )
401  dst->ra_rank++;
402  }
403 }
404 
405 static void
407 {
408  dst->ra_selector = src->ra_selector;
409 
410  if ( is_pentium4() ) {
411  dst->ra_escr[0] = src->ra_escr[0];
412  dst->ra_escr[1] = src->ra_escr[1];
413  }
414 }
415 
416 /* Register allocation */
417 static int
419 {
420  int i, j, natNum;
421  hwd_reg_alloc_t event_list[MAX_COUNTERS];
423 
424  /* Initialize the local structure needed
425  for counter allocation and optimization. */
426  natNum = ESI->NativeCount;
427 
428  if ( is_pentium4() ) {
429  SUBDBG( "native event count: %d\n", natNum );
430  }
431 
432  for ( i = 0; i < natNum; i++ ) {
433  /* retrieve the mapping information about this native event */
434  _papi_libpfm_ntv_code_to_bits( ( unsigned int ) ESI->NativeInfoArray[i].
435  ni_event, &event_list[i].ra_bits );
436 
437  if ( is_pentium4() ) {
438  /* combine counter bit masks for both esc registers into selector */
439  event_list[i].ra_selector =
440  event_list[i].ra_bits.counter[0] | event_list[i].ra_bits.
441  counter[1];
442  } else {
443  /* make sure register allocator only looks at legal registers */
444  event_list[i].ra_selector =
445  event_list[i].ra_bits.selector & ALLCNTRS;
446 #ifdef PERFCTR_X86_INTEL_CORE2
447  if ( _papi_hwi_system_info.hw_info.model ==
448  PERFCTR_X86_INTEL_CORE2 )
449  event_list[i].ra_selector |=
450  ( ( event_list[i].ra_bits.
451  selector >> 16 ) << 2 ) & ALLCNTRS;
452 #endif
453  }
454  /* calculate native event rank, which is no. of counters it can live on */
455  event_list[i].ra_rank = 0;
456  for ( j = 0; j < MAX_COUNTERS; j++ ) {
457  if ( event_list[i].ra_selector & ( 1 << j ) ) {
458  event_list[i].ra_rank++;
459  }
460  }
461 
462  if ( is_pentium4() ) {
463  event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0];
464  event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1];
465 #ifdef DEBUG
466  SUBDBG( "i: %d\n", i );
467  print_alloc( &event_list[i] );
468 #endif
469  }
470  }
471  if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) { /* successfully mapped */
472  for ( i = 0; i < natNum; i++ ) {
473 #ifdef PERFCTR_X86_INTEL_CORE2
474  if ( _papi_hwi_system_info.hw_info.model ==
475  PERFCTR_X86_INTEL_CORE2 )
476  event_list[i].ra_bits.selector = event_list[i].ra_selector;
477 #endif
478 #ifdef DEBUG
479  if ( is_pentium4() ) {
480  SUBDBG( "i: %d\n", i );
481  print_alloc( &event_list[i] );
482  }
483 #endif
484  /* Copy all info about this native event to the NativeInfo struct */
485  ptr = ESI->NativeInfoArray[i].ni_bits;
486  *ptr = event_list[i].ra_bits;
487 
488  if ( is_pentium4() ) {
489  /* The selector contains the counter bit position. Turn it into a number
490  and store it in the first counter value, zeroing the second. */
491  ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1;
492  ptr->counter[1] = 0;
493  }
494 
495  /* Array order on perfctr is event ADD order, not counter #... */
496  ESI->NativeInfoArray[i].ni_position = i;
497  }
498  return PAPI_OK;
499  } else
500  return PAPI_ECNFLCT;
501 }
502 
503 static void
505 {
506  unsigned int i, j;
507 
508  /* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */
509  j = this_state->control.cpu_control.nractrs +
510  this_state->control.cpu_control.nrictrs;
511 
512  /* Remove all counter control command values from eventset. */
513  for ( i = 0; i < j; i++ ) {
514  SUBDBG( "Clearing pmc event entry %d\n", i );
515  if ( is_pentium4() ) {
516  this_state->control.cpu_control.pmc_map[i] = 0;
517  this_state->control.cpu_control.evntsel[i] = 0;
518  this_state->control.cpu_control.evntsel_aux[i] =
519  this_state->control.cpu_control.
520  evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR );
521  } else {
522  this_state->control.cpu_control.pmc_map[i] = i;
523  this_state->control.cpu_control.evntsel[i]
524  = this_state->control.cpu_control.
525  evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR );
526  }
527  this_state->control.cpu_control.ireset[i] = 0;
528  }
529 
530  if ( is_pentium4() ) {
531  /* Clear pebs stuff */
532  this_state->control.cpu_control.p4.pebs_enable = 0;
533  this_state->control.cpu_control.p4.pebs_matrix_vert = 0;
534  }
535 
536  /* clear both a and i counter counts */
537  this_state->control.cpu_control.nractrs = 0;
538  this_state->control.cpu_control.nrictrs = 0;
539 
540 #ifdef DEBUG
541  if ( is_pentium4() )
542  print_control( &this_state->control.cpu_control );
543 #endif
544 }
545 
546 /* This function clears the current contents of the control structure and
547  updates it with whatever resources are allocated for all the native events
548  in the native info structure array. */
549 static int
551  NativeInfo_t * native, int count,
552  hwd_context_t * ctx )
553 {
554  ( void ) ctx; /*unused */
555  unsigned int i, k, retval = PAPI_OK;
556  hwd_register_t *bits,*bits2;
557  struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control;
558 
559  /* clear out the events from the control state */
560  clear_cs_events( this_state );
561 
562  if ( is_pentium4() ) {
563  /* fill the counters we're using */
564  for ( i = 0; i < ( unsigned int ) count; i++ ) {
565  /* dereference the mapping information about this native event */
566  bits = native[i].ni_bits;
567 
568  /* Add counter control command values to eventset */
569  cpu_control->pmc_map[i] = bits->counter[0];
570  cpu_control->evntsel[i] = bits->cccr;
571  cpu_control->ireset[i] = bits->ireset;
572  cpu_control->pmc_map[i] |= FAST_RDPMC;
573  cpu_control->evntsel_aux[i] |= bits->event;
574 
575  /* pebs_enable and pebs_matrix_vert are shared registers used for replay_events.
576  Replay_events count L1 and L2 cache events. There is only one of each for
577  the entire eventset. Therefore, there can be only one unique replay_event
578  per eventset. This means L1 and L2 can't be counted together. Which stinks.
579  This conflict should be trapped in the allocation scheme, but we'll test for it
580  here too, just in case. */
581  if ( bits->pebs_enable ) {
582  /* if pebs_enable isn't set, just copy */
583  if ( cpu_control->p4.pebs_enable == 0 ) {
584  cpu_control->p4.pebs_enable = bits->pebs_enable;
585  /* if pebs_enable conflicts, flag an error */
586  } else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) {
587  SUBDBG
588  ( "WARNING: P4_update_control_state -- pebs_enable conflict!" );
589  retval = PAPI_ECNFLCT;
590  }
591  /* if pebs_enable == bits->pebs_enable, do nothing */
592  }
593  if ( bits->pebs_matrix_vert ) {
594  /* if pebs_matrix_vert isn't set, just copy */
595  if ( cpu_control->p4.pebs_matrix_vert == 0 ) {
596  cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert;
597  /* if pebs_matrix_vert conflicts, flag an error */
598  } else if ( cpu_control->p4.pebs_matrix_vert !=
599  bits->pebs_matrix_vert ) {
600  SUBDBG
601  ( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" );
602  retval = PAPI_ECNFLCT;
603  }
604  /* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */
605  }
606  }
607  this_state->control.cpu_control.nractrs = count;
608 
609  /* Make sure the TSC is always on */
610  this_state->control.cpu_control.tsc_on = 1;
611 
612 #ifdef DEBUG
613  print_control( &this_state->control.cpu_control );
614 #endif
615  } else {
616  switch ( _papi_hwi_system_info.hw_info.model ) {
617 #ifdef PERFCTR_X86_INTEL_CORE2
618  case PERFCTR_X86_INTEL_CORE2:
619  /* fill the counters we're using */
620  for ( i = 0; i < ( unsigned int ) count; i++ ) {
621  bits2 = native[i].ni_bits;
622  for ( k = 0; k < MAX_COUNTERS; k++ )
623  if ( bits2->selector & ( 1 << k ) ) {
624  break;
625  }
626  if ( k > 1 )
627  this_state->control.cpu_control.pmc_map[i] =
628  ( k - 2 ) | 0x40000000;
629  else
630  this_state->control.cpu_control.pmc_map[i] = k;
631 
632  /* Add counter control command values to eventset */
633  this_state->control.cpu_control.evntsel[i] |=
634  bits2->counter_cmd;
635  }
636  break;
637 #endif
638  default:
639  /* fill the counters we're using */
640  for ( i = 0; i < ( unsigned int ) count; i++ ) {
641  /* Add counter control command values to eventset */
642  bits2 = native[i].ni_bits;
643  this_state->control.cpu_control.evntsel[i] |=
644  bits2->counter_cmd;
645  }
646  }
647  this_state->control.cpu_control.nractrs = ( unsigned int ) count;
648  }
649  return retval;
650 }
651 
652 static int
654 {
655  int error;
656 #ifdef DEBUG
657  print_control( &state->control.cpu_control );
658 #endif
659 
660  if ( state->rvperfctr != NULL ) {
661  if ( ( error =
662  rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) {
663  SUBDBG( "rvperfctr_control returns: %d\n", error );
665  return ( PAPI_ESYS );
666  }
667  return ( PAPI_OK );
668  }
669 
670  if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) {
671  SUBDBG( "vperfctr_control returns: %d\n", error );
673  return ( PAPI_ESYS );
674  }
675  return ( PAPI_OK );
676 }
677 
678 static int
680 {
681  int error;
682 
683  if ( state->rvperfctr != NULL ) {
684  if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) {
686  return ( PAPI_ESYS );
687  }
688  return ( PAPI_OK );
689  }
690 
691  error = vperfctr_stop( ctx->perfctr );
692  if ( error < 0 ) {
693  SUBDBG( "vperfctr_stop returns: %d\n", error );
695  return ( PAPI_ESYS );
696  }
697  return ( PAPI_OK );
698 }
699 
700 static int
701 _x86_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **dp,
702  int flags )
703 {
704  if ( flags & PAPI_PAUSED ) {
705  vperfctr_read_state( ctx->perfctr, &spc->state, NULL );
706  if ( !is_pentium4() ) {
707  unsigned int i = 0;
708  for ( i = 0;
709  i <
710  spc->control.cpu_control.nractrs +
711  spc->control.cpu_control.nrictrs; i++ ) {
712  SUBDBG( "vperfctr_read_state: counter %d = %lld\n", i,
713  spc->state.pmc[i] );
714  }
715  }
716  } else {
717  SUBDBG( "vperfctr_read_ctrs\n" );
718  if ( spc->rvperfctr != NULL ) {
719  rvperfctr_read_ctrs( spc->rvperfctr, &spc->state );
720  } else {
721  vperfctr_read_ctrs( ctx->perfctr, &spc->state );
722  }
723  }
724  *dp = ( long long * ) spc->state.pmc;
725 #ifdef DEBUG
726  {
727  if ( ISLEVEL( DEBUG_SUBSTRATE ) ) {
728  unsigned int i;
729  if ( is_pentium4() ) {
730  for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) {
731  SUBDBG( "raw val hardware index %d is %lld\n", i,
732  ( long long ) spc->state.pmc[i] );
733  }
734  } else {
735  for ( i = 0;
736  i <
737  spc->control.cpu_control.nractrs +
738  spc->control.cpu_control.nrictrs; i++ ) {
739  SUBDBG( "raw val hardware index %d is %lld\n", i,
740  ( long long ) spc->state.pmc[i] );
741  }
742  }
743  }
744  }
745 #endif
746  return ( PAPI_OK );
747 }
748 
749 static int
751 {
752  return ( _x86_start( ctx, cntrl ) );
753 }
754 
755 /* Perfctr requires that interrupting counters appear at the end of the pmc list
756  In the case a user wants to interrupt on a counter in an evntset that is not
757  among the last events, we need to move the perfctr virtual events around to
758  make it last. This function swaps two perfctr events, and then adjust the
759  position entries in both the NativeInfoArray and the EventInfoArray to keep
760  everything consistent. */
761 static void
762 swap_events( EventSetInfo_t * ESI, struct hwd_pmc_control *contr, int cntr1,
763  int cntr2 )
764 {
765  unsigned int ui;
766  int si, i, j;
767 
768  for ( i = 0; i < ESI->NativeCount; i++ ) {
769  if ( ESI->NativeInfoArray[i].ni_position == cntr1 )
770  ESI->NativeInfoArray[i].ni_position = cntr2;
771  else if ( ESI->NativeInfoArray[i].ni_position == cntr2 )
772  ESI->NativeInfoArray[i].ni_position = cntr1;
773  }
774 
775  for ( i = 0; i < ESI->NumberOfEvents; i++ ) {
776  for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) {
777  if ( ESI->EventInfoArray[i].pos[j] == cntr1 )
778  ESI->EventInfoArray[i].pos[j] = cntr2;
779  else if ( ESI->EventInfoArray[i].pos[j] == cntr2 )
780  ESI->EventInfoArray[i].pos[j] = cntr1;
781  }
782  }
783 
784  ui = contr->cpu_control.pmc_map[cntr1];
785  contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2];
786  contr->cpu_control.pmc_map[cntr2] = ui;
787 
788  ui = contr->cpu_control.evntsel[cntr1];
789  contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2];
790  contr->cpu_control.evntsel[cntr2] = ui;
791 
792  if ( is_pentium4() ) {
793  ui = contr->cpu_control.evntsel_aux[cntr1];
794  contr->cpu_control.evntsel_aux[cntr1] =
795  contr->cpu_control.evntsel_aux[cntr2];
796  contr->cpu_control.evntsel_aux[cntr2] = ui;
797  }
798 
799  si = contr->cpu_control.ireset[cntr1];
800  contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2];
801  contr->cpu_control.ireset[cntr2] = si;
802 }
803 
804 static int
805 _x86_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
806 {
807  hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state );
808  struct hwd_pmc_control *contr = &(ctl->control);
809  int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0;
810  OVFDBG( "EventIndex=%d\n", EventIndex );
811 
812 #ifdef DEBUG
813  if ( is_pentium4() )
814  print_control( &(contr->cpu_control) );
815 #endif
816 
817  /* The correct event to overflow is EventIndex */
818  ncntrs = _perfctr_vector.cmp_info.num_cntrs;
819  i = ESI->EventInfoArray[EventIndex].pos[0];
820 
821  if ( i >= ncntrs ) {
822  PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs );
823  return PAPI_EINVAL;
824  }
825 
826  if ( threshold != 0 ) { /* Set an overflow threshold */
828  NEED_CONTEXT,
829  _perfctr_vector.cmp_info.CmpIdx );
830  if ( retval != PAPI_OK )
831  return ( retval );
832 
833  /* overflow interrupt occurs on the NEXT event after overflow occurs
834  thus we subtract 1 from the threshold. */
835  contr->cpu_control.ireset[i] = ( -threshold + 1 );
836 
837  if ( is_pentium4() )
838  contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0;
839  else
840  contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE;
841 
842  contr->cpu_control.nrictrs++;
843  contr->cpu_control.nractrs--;
844  nricntrs = ( int ) contr->cpu_control.nrictrs;
845  nracntrs = ( int ) contr->cpu_control.nractrs;
846  contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig;
847 
848  /* move this event to the bottom part of the list if needed */
849  if ( i < nracntrs )
850  swap_events( ESI, contr, i, nracntrs );
851  OVFDBG( "Modified event set\n" );
852  } else {
853  if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) {
854  contr->cpu_control.ireset[i] = 0;
855  contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 );
856  contr->cpu_control.nrictrs--;
857  contr->cpu_control.nractrs++;
858  } else if ( !is_pentium4() &&
859  contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) {
860  contr->cpu_control.ireset[i] = 0;
861  contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE );
862  contr->cpu_control.nrictrs--;
863  contr->cpu_control.nractrs++;
864  }
865 
866  nricntrs = ( int ) contr->cpu_control.nrictrs;
867  nracntrs = ( int ) contr->cpu_control.nractrs;
868 
869  /* move this event to the top part of the list if needed */
870  if ( i >= nracntrs )
871  swap_events( ESI, contr, i, nracntrs - 1 );
872 
873  if ( !nricntrs )
874  contr->si_signo = 0;
875 
876  OVFDBG( "Modified event set\n" );
877 
879  }
880 
881 #ifdef DEBUG
882  if ( is_pentium4() )
883  print_control( &(contr->cpu_control) );
884 #endif
885  OVFDBG( "End of call. Exit code: %d\n", retval );
886  return ( retval );
887 }
888 
889 static int
891 {
892  ( void ) master; /*unused */
893  ( void ) ESI; /*unused */
894  return ( PAPI_OK );
895 }
896 
897 
898 
899 /* these define cccr and escr register bits, and the p4 event structure */
900 #include "perfmon/pfmlib_pentium4.h"
901 #include "../lib/pfmlib_pentium4_priv.h"
902 
903 #define P4_REPLAY_REAL_MASK 0x00000003
904 
905 extern pentium4_escr_reg_t pentium4_escrs[];
906 extern pentium4_cccr_reg_t pentium4_cccrs[];
907 extern pentium4_event_t pentium4_events[];
908 
909 
910 static pentium4_replay_regs_t p4_replay_regs[] = {
911  /* 0 */ {.enb = 0,
912  /* dummy */
913  .mat_vert = 0,
914  },
915  /* 1 */ {.enb = 0,
916  /* dummy */
917  .mat_vert = 0,
918  },
919  /* 2 */ {.enb = 0x01000001,
920  /* 1stL_cache_load_miss_retired */
921  .mat_vert = 0x00000001,
922  },
923  /* 3 */ {.enb = 0x01000002,
924  /* 2ndL_cache_load_miss_retired */
925  .mat_vert = 0x00000001,
926  },
927  /* 4 */ {.enb = 0x01000004,
928  /* DTLB_load_miss_retired */
929  .mat_vert = 0x00000001,
930  },
931  /* 5 */ {.enb = 0x01000004,
932  /* DTLB_store_miss_retired */
933  .mat_vert = 0x00000002,
934  },
935  /* 6 */ {.enb = 0x01000004,
936  /* DTLB_all_miss_retired */
937  .mat_vert = 0x00000003,
938  },
939  /* 7 */ {.enb = 0x01018001,
940  /* Tagged_mispred_branch */
941  .mat_vert = 0x00000010,
942  },
943  /* 8 */ {.enb = 0x01000200,
944  /* MOB_load_replay_retired */
945  .mat_vert = 0x00000001,
946  },
947  /* 9 */ {.enb = 0x01000400,
948  /* split_load_retired */
949  .mat_vert = 0x00000001,
950  },
951  /* 10 */ {.enb = 0x01000400,
952  /* split_store_retired */
953  .mat_vert = 0x00000002,
954  },
955 };
956 
957 /* this maps the arbitrary pmd index in libpfm/pentium4_events.h to the intel documentation */
958 static int pfm2intel[] =
959  { 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 };
960 
961 
962 
963 
964 /* This call is broken. Selector can be much bigger than 32 bits. It should be a pfmlib_regmask_t - pjm */
965 /* Also, libpfm assumes events can live on different counters with different codes. This call only returns
966  the first occurence found. */
967 /* Right now its only called by ntv_code_to_bits in perfctr-p3, so we're ok. But for it to be
968  generally useful it should be fixed. - dkt */
969 static int
970 _pfm_get_counter_info( unsigned int event, unsigned int *selector, int *code )
971 {
972  pfmlib_regmask_t cnt, impl;
973  unsigned int num;
974  unsigned int i, first = 1;
975  int ret;
976 
977  if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) {
978  PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt,
979  pfm_strerror( ret ) );
980  return PAPI_ESYS;
981  }
982  if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) {
983  PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) );
984  return PAPI_ESYS;
985  }
986  if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) {
987  PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl,
988  pfm_strerror( ret ) );
989  return PAPI_ESYS;
990  }
991 
992  *selector = 0;
993  for ( i = 0; num; i++ ) {
994  if ( pfm_regmask_isset( &impl, i ) )
995  num--;
996  if ( pfm_regmask_isset( &cnt, i ) ) {
997  if ( first ) {
998  if ( ( ret =
999  pfm_get_event_code_counter( event, i,
1000  code ) ) !=
1001  PFMLIB_SUCCESS ) {
1002  PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s",
1003  event, i, code, pfm_strerror( ret ) );
1004  return PAPI_ESYS;
1005  }
1006  first = 0;
1007  }
1008  *selector |= 1 << i;
1009  }
1010  }
1011  return PAPI_OK;
1012 }
1013 
1014 int
1015 _papi_libpfm_ntv_code_to_bits_perfctr( unsigned int EventCode,
1016  hwd_register_t *newbits )
1017 {
1018  unsigned int event, umask;
1019 
1020  X86_register_t *bits = (X86_register_t *)newbits;
1021 
1022  if ( is_pentium4() ) {
1023  pentium4_escr_value_t escr_value;
1024  pentium4_cccr_value_t cccr_value;
1025  unsigned int num_masks, replay_mask, unit_masks[12];
1026  unsigned int event_mask;
1027  unsigned int tag_value, tag_enable;
1028  unsigned int i;
1029  int j, escr, cccr, pmd;
1030 
1031  if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
1032  return PAPI_ENOEVNT;
1033 
1034  /* for each allowed escr (1 or 2) find the allowed cccrs.
1035  for each allowed cccr find the pmd index
1036  convert to an intel counter number; or it into bits->counter */
1037  for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) {
1038  bits->counter[i] = 0;
1039  escr = pentium4_events[event].allowed_escrs[i];
1040  if ( escr < 0 ) {
1041  continue;
1042  }
1043 
1044  bits->escr[i] = escr;
1045 
1046  for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) {
1047  cccr = pentium4_escrs[escr].allowed_cccrs[j];
1048  if ( cccr < 0 ) {
1049  continue;
1050  }
1051 
1052  pmd = pentium4_cccrs[cccr].pmd;
1053  bits->counter[i] |= ( 1 << pfm2intel[pmd] );
1054  }
1055  }
1056 
1057  /* if there's only one valid escr, copy the values */
1058  if ( escr < 0 ) {
1059  bits->escr[1] = bits->escr[0];
1060  bits->counter[1] = bits->counter[0];
1061  }
1062 
1063  /* Calculate the event-mask value. Invalid masks
1064  * specified by the caller are ignored. */
1065  tag_value = 0;
1066  tag_enable = 0;
1067  event_mask = _pfm_convert_umask( event, umask );
1068 
1069  if ( event_mask & 0xF0000 ) {
1070  tag_enable = 1;
1071  tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS );
1072  }
1073 
1074  event_mask &= 0x0FFFF; /* mask off possible tag bits */
1075 
1076  /* Set up the ESCR and CCCR register values. */
1077  escr_value.val = 0;
1078  escr_value.bits.t1_usr = 0; /* controlled by kernel */
1079  escr_value.bits.t1_os = 0; /* controlled by kernel */
1080 // escr_value.bits.t0_usr = (plm & PFM_PLM3) ? 1 : 0;
1081 // escr_value.bits.t0_os = (plm & PFM_PLM0) ? 1 : 0;
1082  escr_value.bits.tag_enable = tag_enable;
1083  escr_value.bits.tag_value = tag_value;
1084  escr_value.bits.event_mask = event_mask;
1085  escr_value.bits.event_select = pentium4_events[event].event_select;
1086  escr_value.bits.reserved = 0;
1087 
1088  /* initialize the proper bits in the cccr register */
1089  cccr_value.val = 0;
1090  cccr_value.bits.reserved1 = 0;
1091  cccr_value.bits.enable = 1;
1092  cccr_value.bits.escr_select = pentium4_events[event].escr_select;
1093  cccr_value.bits.active_thread = 3;
1094  /* FIXME: This is set to count when either logical
1095  * CPU is active. Need a way to distinguish
1096  * between logical CPUs when HT is enabled.
1097  * the docs say these bits should always
1098  * be set. */
1099  cccr_value.bits.compare = 0;
1100  /* FIXME: What do we do with "threshold" settings? */
1101  cccr_value.bits.complement = 0;
1102  /* FIXME: What do we do with "threshold" settings? */
1103  cccr_value.bits.threshold = 0;
1104  /* FIXME: What do we do with "threshold" settings? */
1105  cccr_value.bits.force_ovf = 0;
1106  /* FIXME: Do we want to allow "forcing" overflow
1107  * interrupts on all counter increments? */
1108  cccr_value.bits.ovf_pmi_t0 = 0;
1109  cccr_value.bits.ovf_pmi_t1 = 0;
1110  /* PMI taken care of by kernel typically */
1111  cccr_value.bits.reserved2 = 0;
1112  cccr_value.bits.cascade = 0;
1113  /* FIXME: How do we handle "cascading" counters? */
1114  cccr_value.bits.overflow = 0;
1115 
1116  /* these flags are always zero, from what I can tell... */
1117  bits->pebs_enable = 0; /* flag for PEBS counting */
1118  bits->pebs_matrix_vert = 0;
1119  /* flag for PEBS_MATRIX_VERT, whatever that is */
1120 
1121  /* ...unless the event is replay_event */
1122  if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) {
1123  escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK;
1124  num_masks = prepare_umask( umask, unit_masks );
1125  for ( i = 0; i < num_masks; i++ ) {
1126  replay_mask = unit_masks[i];
1127  if ( replay_mask > 1 && replay_mask < 11 ) {
1128  /* process each valid mask we find */
1129  bits->pebs_enable |= p4_replay_regs[replay_mask].enb;
1130  bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert;
1131  }
1132  }
1133  }
1134 
1135  /* store the escr and cccr values */
1136  bits->event = escr_value.val;
1137  bits->cccr = cccr_value.val;
1138  bits->ireset = 0; /* I don't really know what this does */
1139  SUBDBG( "escr: 0x%lx; cccr: 0x%lx\n", escr_value.val, cccr_value.val );
1140  } else {
1141 
1142  int ret, code;
1143 
1144  if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
1145  return PAPI_ENOEVNT;
1146 
1147  if ( ( ret = _pfm_get_counter_info( event, &bits->selector,
1148  &code ) ) != PAPI_OK )
1149  return ret;
1150 
1151  bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) );
1152 
1153  SUBDBG( "selector: %#x\n", bits->selector );
1154  SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event,
1155  umask, code, ( ( hwd_register_t * ) bits )->counter_cmd );
1156  }
1157 
1158  return PAPI_OK;
1159 }
1160 
1161 
1162 
1163 papi_vector_t _perfctr_vector = {
1164  .cmp_info = {
1165  /* default component information (unspecified values are initialized to 0) */
1166  .name = "perfctr",
1167  .description = "Linux perfctr CPU counters",
1168  .default_domain = PAPI_DOM_USER,
1169  .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL,
1170  .default_granularity = PAPI_GRN_THR,
1171  .available_granularities = PAPI_GRN_THR,
1172  .hardware_intr_sig = PAPI_INT_SIGNAL,
1173 
1174  /* component specific cmp_info initializations */
1175  .fast_real_timer = 1,
1176  .fast_virtual_timer = 1,
1177  .attach = 1,
1178  .attach_must_ptrace = 1,
1179  .cntr_umasks = 1,
1180  }
1181  ,
1182 
1183  /* sizes of framework-opaque component-private structures */
1184  .size = {
1185  .context = sizeof ( X86_perfctr_context_t ),
1186  .control_state = sizeof ( X86_perfctr_control_t ),
1187  .reg_value = sizeof ( X86_register_t ),
1188  .reg_alloc = sizeof ( X86_reg_alloc_t ),
1189  }
1190  ,
1191 
1192  /* function pointers in this component */
1193  .init_control_state = _x86_init_control_state,
1194  .start = _x86_start,
1195  .stop = _x86_stop,
1196  .read = _x86_read,
1197  .allocate_registers = _x86_allocate_registers,
1198  .update_control_state = _x86_update_control_state,
1199  .set_domain = _x86_set_domain,
1200  .reset = _x86_reset,
1201  .set_overflow = _x86_set_overflow,
1202  .stop_profiling = _x86_stop_profiling,
1203 
1204  .init_component = _perfctr_init_component,
1205  .ctl = _perfctr_ctl,
1206  .dispatch_timer = _perfctr_dispatch_timer,
1207  .init_thread = _perfctr_init_thread,
1208  .shutdown_thread = _perfctr_shutdown_thread,
1209 
1210  /* from libpfm */
1211  .ntv_enum_events = _papi_libpfm_ntv_enum_events,
1212  .ntv_name_to_code = _papi_libpfm_ntv_name_to_code,
1213  .ntv_code_to_name = _papi_libpfm_ntv_code_to_name,
1214  .ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr,
1215  .ntv_code_to_bits = _papi_libpfm_ntv_code_to_bits_perfctr,
1216 
1217 };
1218 
1219 
#define hwd_pmc_control
Definition: perfctr-x86.h:11
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:626
#define PAPI_ENOEVNT
Definition: papi.h:258
unsigned counter[2]
Definition: perfctr-x86.h:77
static int prepare_umask(unsigned int foo, unsigned int *values)
static void print_alloc(X86_reg_alloc_t *a)
Definition: perfctr-x86.c:87
static int _papi_bipartite_alloc(hwd_reg_alloc_t *event_list, int count, int cidx)
hwd_register_t * ni_bits
pentium4_escr_reg_t pentium4_escrs[]
long long flags
Definition: iozone.c:12330
pentium4_cccr_reg_t pentium4_cccrs[]
static int _bpt_map_avail(hwd_reg_alloc_t *dst, int ctr)
Definition: perfctr-x86.c:266
void _perfctr_dispatch_timer(int signal, hwd_siginfo_t *si, void *context)
#define PAPI_DOM_KERNEL
Definition: papi.h:298
int _papi_libpfm_ntv_enum_events(unsigned int *EventCode, int modifier)
static int _x86_init_control_state(hwd_control_state_t *ptr)
Definition: perfctr-x86.c:119
#define MAX_COUNTERS
Definition: perfctr-x86.h:8
device[deviceId] domain[domainId] event
Definition: linux-cuda.c:306
int _perfctr_shutdown_thread(hwd_context_t *ctx)
Definition: perfctr.c:428
#define PERF_USR
Definition: perfctr-x86.h:57
int _perfctr_init_thread(hwd_context_t *ctx)
Definition: perfctr.c:380
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
int event[MAX_COUNTERS]
Definition: solaris-ultra.h:47
static int is_pentium4(void)
Definition: perfctr-x86.c:75
static void _bpt_map_preempt(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
Definition: perfctr-x86.c:343
static int _x86_read(hwd_context_t *ctx, hwd_control_state_t *spc, long long **dp, int flags)
Definition: perfctr-x86.c:701
#define PAPI_DOM_USER
Definition: papi.h:296
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define ESCR_T0_OS
Definition: perfctr-x86.h:16
#define P4_REPLAY_REAL_MASK
Definition: perfctr-x86.c:903
int _papi_libpfm_ntv_code_to_bits_perfctr(unsigned int EventCode, hwd_register_t *newbits)
Definition: perfctr-x86.c:1015
static void clear_cs_events(hwd_control_state_t *this_state)
Definition: perfctr-x86.c:504
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:37
Return codes and api definitions.
static int _bpt_map_shared(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
Definition: perfctr-x86.c:305
static int _bpt_map_exclusive(hwd_reg_alloc_t *dst)
Definition: perfctr-x86.c:295
static int _x86_stop(hwd_context_t *ctx, hwd_control_state_t *state)
Definition: perfctr-x86.c:679
long long ret
Definition: iozone.c:1346
static int _x86_set_overflow(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perfctr-x86.c:805
static int _pfm_get_counter_info(unsigned int event, unsigned int *selector, int *code)
Definition: perfctr-x86.c:970
static int _x86_update_control_state(hwd_control_state_t *this_state, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: perfctr-x86.c:550
int _papi_libpfm_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len)
unsigned ra_selector
Definition: perfctr-x86.h:89
static int _x86_reset(hwd_context_t *ctx, hwd_control_state_t *cntrl)
Definition: perfctr-x86.c:750
int i
Definition: fileop.c:140
unsigned int selector
Definition: perfctr-x86.h:74
unsigned pebs_enable
Definition: perfctr-x86.h:81
static int _x86_stop_profiling(ThreadInfo_t *master, EventSetInfo_t *ESI)
Definition: perfctr-x86.c:890
static int pfm2intel[]
Definition: perfctr-x86.c:958
int k
Definition: iozone.c:19136
#define OVFDBG(format, args...)
Definition: papi_debug.h:68
unsigned int _pfm_convert_umask(unsigned int event, unsigned int umask)
void int num
Definition: iozone.c:22151
#define PAPI_ESYS
Definition: papi.h:253
static int native
Definition: event_info.c:39
unsigned escr[2]
Definition: perfctr-x86.h:78
static void _bpt_map_set(hwd_reg_alloc_t *dst, int ctr)
Definition: perfctr-x86.c:275
unsigned cccr
Definition: perfctr-x86.h:79
#define PERF_OS
Definition: perfctr-x86.h:56
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _perfctr_vector
Definition: perfctr-x86.c:1163
#define PAPI_ECNFLCT
Definition: papi.h:259
void PAPIERROR(char *format,...)
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:401
int _perfctr_init_component(int)
Definition: perfctr.c:107
#define PAPI_INT_SIGNAL
Definition: papi_internal.h:53
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:441
#define PAPI_GRN_THR
Definition: papi.h:360
static int _x86_start(hwd_context_t *ctx, hwd_control_state_t *state)
Definition: perfctr-x86.c:653
NativeInfo_t * NativeInfoArray
EventInfo_t * EventInfoArray
int cpuid_family
Definition: papi.h:789
int threshold
int _papi_libpfm_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len)
int _x86_set_domain(hwd_control_state_t *cntrl, int domain)
Definition: perfctr-x86.c:210
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:57
PAPI_hw_info_t hw_info
#define PAPI_VENDOR_INTEL
Definition: papi.h:346
#define ALLCNTRS
Definition: perfctr-x86.h:35
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
#define PERF_INT_ENABLE
Definition: perfctr-x86.h:53
int vendor
Definition: papi.h:784
static void _bpt_map_update(hwd_reg_alloc_t *dst, hwd_reg_alloc_t *src)
Definition: perfctr-x86.c:406
#define RCNTRL_ERROR
Definition: perfctr-x86.h:66
int model
Definition: papi.h:786
char * name
Definition: iozone.c:23648
static pentium4_replay_regs_t p4_replay_regs[]
Definition: perfctr-x86.c:910
#define NEED_CONTEXT
Definition: papi_internal.h:97
unsigned ra_rank
Definition: perfctr-x86.h:90
int
Definition: iozone.c:18528
child_idents[x-1] state
Definition: iozone.c:21341
unsigned pebs_matrix_vert
Definition: perfctr-x86.h:82
unsigned ireset
Definition: perfctr-x86.h:83
#define ISLEVEL(a)
Definition: papi_debug.h:54
int _papi_libpfm_ntv_name_to_code(char *name, unsigned int *event_code)
#define ESCR_T0_USR
Definition: perfctr-x86.h:17
int _papi_libpfm_ntv_code_to_bits(unsigned int EventCode, hwd_register_t *bits)
#define FAST_RDPMC
Definition: perfctr-x86.h:19
void print_control(const struct perfctr_cpu_control *control)
Definition: perfctr-x86.c:96
int _perfctr_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: perfctr.c:289
pentium4_event_t pentium4_events[]
#define PAPI_PAUSED
Definition: papi.h:374
hwd_control_state_t * ctl_state
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
#define PERF_ENABLE
Definition: perfctr-x86.h:52
unsigned event
Definition: perfctr-x86.h:80
static int _x86_allocate_registers(EventSetInfo_t *ESI)
Definition: perfctr-x86.c:418
signal(SIGINT, signal_handler)
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")
unsigned ra_escr[2]
Definition: perfctr-x86.h:92
static int _pfm_decode_native_event(unsigned int EventCode, unsigned int *event, unsigned int *umask)
#define CCCR_OVF_PMI_T0
Definition: perfctr-x86.h:18
static void swap_events(EventSetInfo_t *ESI, struct hwd_pmc_control *contr, int cntr1, int cntr2)
Definition: perfctr-x86.c:762
#define VCNTRL_ERROR
Definition: perfctr-x86.h:65
#define DEBUG_SUBSTRATE
Definition: papi_debug.h:27
char * ptr
Definition: iozone.c:23586