PAPI  5.3.2.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
extras.c
Go to the documentation of this file.
1 /****************************/
2 /* THIS IS OPEN SOURCE CODE */
3 /****************************/
4 
5 /*
6 * File: extras.c
7 * Author: Philip Mucci
8 * mucci@cs.utk.edu
9 * Mods: dan terpstra
10 * terpstra@cs.utk.edu
11 * Mods: Haihang You
12 * you@cs.utk.edu
13 * Mods: Kevin London
14 * london@cs.utk.edu
15 * Mods: Maynard Johnson
16 * maynardj@us.ibm.com
17 */
18 
19 /* This file contains portable routines to do things that we wish the
20 vendors did in the kernel extensions or performance libraries. */
21 
22 #include "papi.h"
23 #include "papi_internal.h"
24 #include "papi_vector.h"
25 #include "papi_memory.h"
26 #include "extras.h"
27 #include "threads.h"
28 
29 #if (!defined(HAVE_FFSLL) || defined(__bgp__))
30 int ffsll( long long lli );
31 #endif
32 
33 /****************/
34 /* BEGIN LOCALS */
35 /****************/
36 
37 static unsigned int _rnum = DEADBEEF;
38 
39 /**************/
40 /* END LOCALS */
41 /**************/
42 
43 inline_static unsigned short
45 {
46  return ( unsigned short ) ( _rnum = 1664525 * _rnum + 1013904223 );
47 }
48 
49 
50 /* compute the amount by which to increment the bucket.
51  value is the current value of the bucket
52  this routine is used by all three profiling cases
53  it is inlined for speed
54 */
55 inline_static int
57  int flags, long long excess, long long threshold )
58 {
59  int increment = 1;
60 
61  if ( flags == PAPI_PROFIL_POSIX ) {
62  return ( 1 );
63  }
64 
65  if ( flags & PAPI_PROFIL_RANDOM ) {
66  if ( random_ushort( ) <= ( USHRT_MAX / 4 ) )
67  return ( 0 );
68  }
69 
70  if ( flags & PAPI_PROFIL_COMPRESS ) {
71  /* We're likely to ignore the sample if buf[address] gets big. */
72  if ( random_ushort( ) < value ) {
73  return ( 0 );
74  }
75  }
76 
77  if ( flags & PAPI_PROFIL_WEIGHTED ) { /* Increment is between 1 and 255 */
78  if ( excess <= ( long long ) 1 )
79  increment = 1;
80  else if ( excess > threshold )
81  increment = 255;
82  else {
83  threshold = threshold / ( long long ) 255;
84  increment = ( int ) ( excess / threshold );
85  }
86  }
87  return ( increment );
88 }
89 
90 
91 static void
92 posix_profil( caddr_t address, PAPI_sprofil_t * prof,
93  int flags, long long excess, long long threshold )
94 {
95  unsigned short *buf16;
96  unsigned int *buf32;
97  unsigned long long *buf64;
98  unsigned long indx;
99  unsigned long long lloffset;
100 
101  /* SPECIAL CASE: if starting address is 0 and scale factor is 2
102  then all counts go into first bin.
103  */
104  if ( ( prof->pr_off == 0 ) && ( prof->pr_scale == 0x2 ) )
105  indx = 0;
106  else {
107  /* compute the profile buffer offset by:
108  - subtracting the profiling base address from the pc address
109  - multiplying by the scaling factor
110  - dividing by max scale (65536, or 2^^16)
111  - dividing by implicit 2 (2^^1 for a total of 2^^17), for even addresses
112  NOTE: 131072 is a valid scale value. It produces byte resolution of addresses
113  */
114  lloffset =
115  ( unsigned long long ) ( ( address - prof->pr_off ) *
116  prof->pr_scale );
117  indx = ( unsigned long ) ( lloffset >> 17 );
118  }
119 
120  /* confirm addresses within specified range */
121  if ( address >= prof->pr_off ) {
122  /* test first for 16-bit buckets; this should be the fast case */
123  if ( flags & PAPI_PROFIL_BUCKET_16 ) {
124  if ( ( indx * sizeof ( short ) ) < prof->pr_size ) {
125  buf16 = prof->pr_base;
126  buf16[indx] =
127  ( unsigned short ) ( ( unsigned short ) buf16[indx] +
128  profil_increment( buf16[indx], flags,
129  excess,
130  threshold ) );
131  PRFDBG( "posix_profil_16() bucket %lu = %u\n", indx,
132  buf16[indx] );
133  }
134  }
135  /* next, look for the 32-bit case */
136  else if ( flags & PAPI_PROFIL_BUCKET_32 ) {
137  if ( ( indx * sizeof ( int ) ) < prof->pr_size ) {
138  buf32 = prof->pr_base;
139  buf32[indx] = ( unsigned int ) buf32[indx] +
140  ( unsigned int ) profil_increment( buf32[indx], flags,
141  excess, threshold );
142  PRFDBG( "posix_profil_32() bucket %lu = %u\n", indx,
143  buf32[indx] );
144  }
145  }
146  /* finally, fall through to the 64-bit case */
147  else {
148  if ( ( indx * sizeof ( long long ) ) < prof->pr_size ) {
149  buf64 = prof->pr_base;
150  buf64[indx] = ( unsigned long long ) buf64[indx] +
151  ( unsigned long long ) profil_increment( ( long long )
152  buf64[indx], flags,
153  excess,
154  threshold );
155  PRFDBG( "posix_profil_64() bucket %lu = %lld\n", indx,
156  buf64[indx] );
157  }
158  }
159  }
160 }
161 
162 void
164  long long over, int profile_index )
165 {
166  EventSetProfileInfo_t *profile = &ESI->profile;
167  PAPI_sprofil_t *sprof;
168  caddr_t offset = 0;
169  caddr_t best_offset = 0;
170  int count;
171  int best_index = -1;
172  int i;
173 
174  PRFDBG( "handled IP %p\n", pc );
175 
176  sprof = profile->prof[profile_index];
177  count = profile->count[profile_index];
178 
179  for ( i = 0; i < count; i++ ) {
180  offset = sprof[i].pr_off;
181  if ( ( offset < pc ) && ( offset > best_offset ) ) {
182  best_index = i;
183  best_offset = offset;
184  }
185  }
186 
187  if ( best_index == -1 )
188  best_index = 0;
189 
190  posix_profil( pc, &sprof[best_index], profile->flags, over,
191  profile->threshold[profile_index] );
192 }
193 
194 /* if isHardware is true, then the processor is using hardware overflow,
195  else it is using software overflow. Use this parameter instead of
196  _papi_hwi_system_info.supports_hw_overflow is in CRAY some processors
197  may use hardware overflow, some may use software overflow.
198 
199  overflow_bit: if the component can get the overflow bit when overflow
200  occurs, then this should be passed by the component;
201 
202  If both genOverflowBit and isHardwareSupport are true, that means
203  the component doesn't know how to get the overflow bit from the
204  kernel directly, so we generate the overflow bit in this function
205  since this function can access the ESI->overflow struct;
206  (The component can only set genOverflowBit parameter to true if the
207  hardware doesn't support multiple hardware overflow. If the
208  component supports multiple hardware overflow and you don't know how
209  to get the overflow bit, then I don't know how to deal with this
210  situation).
211 */
212 
213 int
214 _papi_hwi_dispatch_overflow_signal( void *papiContext, caddr_t address,
215  int *isHardware, long long overflow_bit,
216  int genOverflowBit, ThreadInfo_t ** t,
217  int cidx )
218 {
219  int retval, event_counter, i, overflow_flag, pos;
220  int papi_index, j;
221  int profile_index = 0;
222  long long overflow_vector;
223 
224  long long temp[_papi_hwd[cidx]->cmp_info.num_cntrs], over;
225  long long latest = 0;
227  EventSetInfo_t *ESI;
228  _papi_hwi_context_t *ctx = ( _papi_hwi_context_t * ) papiContext;
229 
230  OVFDBG( "enter\n" );
231 
232  if ( *t )
233  thread = *t;
234  else
235  *t = thread = _papi_hwi_lookup_thread( 0 );
236 
237  if ( thread != NULL ) {
238  ESI = thread->running_eventset[cidx];
239 
240  if ( ( ESI == NULL ) || ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) {
241  OVFDBG( "Either no eventset or eventset not set to overflow.\n" );
242 #ifdef ANY_THREAD_GETS_SIGNAL
243  _papi_hwi_broadcast_signal( thread->tid );
244 #endif
245  return ( PAPI_OK );
246  }
247 
248  if ( ESI->CmpIdx != cidx )
249  return ( PAPI_ENOCMP );
250 
251  if ( ESI->master != thread ) {
252  PAPIERROR
253  ( "eventset->thread %#lx vs. current thread %#lx mismatch",
254  ESI->master, thread );
255  return ( PAPI_EBUG );
256  }
257 
258  if ( isHardware ) {
259  if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) {
260  ESI->state |= PAPI_PAUSED;
261  *isHardware = 1;
262  } else
263  *isHardware = 0;
264  }
265  /* Get the latest counter value */
266  event_counter = ESI->overflow.event_counter;
267 
268  overflow_flag = 0;
269  overflow_vector = 0;
270 
271  if ( !( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) {
272  retval = _papi_hwi_read( thread->context[cidx], ESI, ESI->sw_stop );
273  if ( retval < PAPI_OK )
274  return ( retval );
275  for ( i = 0; i < event_counter; i++ ) {
276  papi_index = ESI->overflow.EventIndex[i];
277  latest = ESI->sw_stop[papi_index];
278  temp[i] = -1;
279 
280  if ( latest >= ( long long ) ESI->overflow.deadline[i] ) {
281  OVFDBG
282  ( "dispatch_overflow() latest %lld, deadline %lld, threshold %d\n",
283  latest, ESI->overflow.deadline[i],
284  ESI->overflow.threshold[i] );
285  pos = ESI->EventInfoArray[papi_index].pos[0];
286  overflow_vector ^= ( long long ) 1 << pos;
287  temp[i] = latest - ESI->overflow.deadline[i];
288  overflow_flag = 1;
289  /* adjust the deadline */
290  ESI->overflow.deadline[i] =
291  latest + ESI->overflow.threshold[i];
292  }
293  }
294  } else if ( genOverflowBit ) {
295  /* we had assumed the overflow event can't be derived event */
296  papi_index = ESI->overflow.EventIndex[0];
297 
298  /* suppose the pos is the same as the counter number
299  * (this is not true in Itanium, but itanium doesn't
300  * need us to generate the overflow bit
301  */
302  pos = ESI->EventInfoArray[papi_index].pos[0];
303  overflow_vector = ( long long ) 1 << pos;
304  } else
305  overflow_vector = overflow_bit;
306 
307  if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) || overflow_flag ) {
308  if ( ESI->state & PAPI_PROFILING ) {
309  int k = 0;
310  while ( overflow_vector ) {
311  i = ffsll( overflow_vector ) - 1;
312  for ( j = 0; j < event_counter; j++ ) {
313  papi_index = ESI->overflow.EventIndex[j];
314  /* This loop is here ONLY because Pentium 4 can have tagged *
315  * events that contain more than one counter without being *
316  * derived. You've gotta scan all terms to make sure you *
317  * find the one to profile. */
318  for ( k = 0, pos = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT && pos >= 0;
319  k++ ) {
320  pos = ESI->EventInfoArray[papi_index].pos[k];
321  if ( i == pos ) {
322  profile_index = j;
323  goto foundit;
324  }
325  }
326  }
327  if ( j == event_counter ) {
328  PAPIERROR
329  ( "BUG! overflow_vector is 0, dropping interrupt" );
330  return ( PAPI_EBUG );
331  }
332 
333  foundit:
334  if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) )
335  over = 0;
336  else
337  over = temp[profile_index];
338  _papi_hwi_dispatch_profile( ESI, address, over,
339  profile_index );
340  overflow_vector ^= ( long long ) 1 << i;
341  }
342  /* do not use overflow_vector after this place */
343  } else {
344  ESI->overflow.handler( ESI->EventSetIndex, ( void * ) address,
345  overflow_vector, ctx->ucontext );
346  }
347  }
348  ESI->state &= ~( PAPI_PAUSED );
349  }
350 #ifdef ANY_THREAD_GETS_SIGNAL
351  else {
352  OVFDBG( "I haven't been noticed by PAPI before\n" );
354  }
355 #endif
356  return ( PAPI_OK );
357 }
358 
359 #include <sys/time.h>
360 #include <errno.h>
361 #include <string.h>
362 
364 
365 int
366 _papi_hwi_start_timer( int timer, int signal, int ns )
367 {
368  struct itimerval value;
369  int us = ns / 1000;
370 
371  if ( us == 0 )
372  us = 1;
373 
374 #ifdef ANY_THREAD_GETS_SIGNAL
376  if ( ( _papi_hwi_using_signal[signal] - 1 ) ) {
377  INTDBG( "itimer already installed\n" );
379  return ( PAPI_OK );
380  }
382 #else
383  ( void ) signal; /*unused */
384 #endif
385 
386  value.it_interval.tv_sec = 0;
387  value.it_interval.tv_usec = us;
388  value.it_value.tv_sec = 0;
389  value.it_value.tv_usec = us;
390 
391  INTDBG( "Installing itimer %d, with %d us interval\n", timer, us );
392  if ( setitimer( timer, &value, NULL ) < 0 ) {
393  PAPIERROR( "setitimer errno %d", errno );
394  return ( PAPI_ESYS );
395  }
396 
397  return ( PAPI_OK );
398 }
399 
400 int
401 _papi_hwi_start_signal( int signal, int need_context, int cidx )
402 {
403  struct sigaction action;
404 
407  if ( _papi_hwi_using_signal[signal] - 1 ) {
408  INTDBG( "_papi_hwi_using_signal is now %d\n",
409  _papi_hwi_using_signal[signal] );
411  return ( PAPI_OK );
412  }
413 
414  memset( &action, 0x00, sizeof ( struct sigaction ) );
415  action.sa_flags = SA_RESTART;
416  action.sa_sigaction =
417  ( void ( * )( int, siginfo_t *, void * ) ) _papi_hwd[cidx]->
418  dispatch_timer;
419  if ( need_context )
420 #if (defined(_BGL) /*|| defined (__bgp__)*/)
421  action.sa_flags |= SIGPWR;
422 #else
423  action.sa_flags |= SA_SIGINFO;
424 #endif
425 
426  INTDBG( "installing signal handler\n" );
427  if ( sigaction( signal, &action, NULL ) < 0 ) {
428  PAPIERROR( "sigaction errno %d", errno );
430  return ( PAPI_ESYS );
431  }
432 
433  INTDBG( "_papi_hwi_using_signal[%d] is now %d.\n", signal,
434  _papi_hwi_using_signal[signal] );
436 
437  return ( PAPI_OK );
438 }
439 
440 int
442 {
444  if ( --_papi_hwi_using_signal[signal] == 0 ) {
445  INTDBG( "removing signal handler\n" );
446  if ( sigaction( signal, NULL, NULL ) == -1 ) {
447  PAPIERROR( "sigaction errno %d", errno );
449  return ( PAPI_ESYS );
450  }
451  }
452 
453  INTDBG( "_papi_hwi_using_signal[%d] is now %d\n", signal,
454  _papi_hwi_using_signal[signal] );
456 
457  return ( PAPI_OK );
458 }
459 
460 int
461 _papi_hwi_stop_timer( int timer, int signal )
462 {
463  struct itimerval value;
464 
465 #ifdef ANY_THREAD_GETS_SIGNAL
467  if ( _papi_hwi_using_signal[signal] > 1 ) {
468  INTDBG( "itimer in use by another thread\n" );
470  return ( PAPI_OK );
471  }
473 #else
474  ( void ) signal; /*unused */
475 #endif
476 
477  value.it_interval.tv_sec = 0;
478  value.it_interval.tv_usec = 0;
479  value.it_value.tv_sec = 0;
480  value.it_value.tv_usec = 0;
481 
482  INTDBG( "turning off timer\n" );
483  if ( setitimer( timer, &value, NULL ) == -1 ) {
484  PAPIERROR( "setitimer errno %d", errno );
485  return PAPI_ESYS;
486  }
487 
488  return PAPI_OK;
489 }
490 
491 
492 
493 #if (!defined(HAVE_FFSLL) || defined(__bgp__))
494 /* find the first set bit in long long */
495 
496 int
497 ffsll( long long lli )
498 {
499  int i, num, t, tmpint, len;
500 
501  num = sizeof ( long long ) / sizeof ( int );
502  if ( num == 1 )
503  return ( ffs( ( int ) lli ) );
504  len = sizeof ( int ) * CHAR_BIT;
505 
506  for ( i = 0; i < num; i++ ) {
507  tmpint = ( int ) ( ( ( lli >> len ) << len ) ^ lli );
508 
509  t = ffs( tmpint );
510  if ( t ) {
511  return ( t + i * len );
512  }
513  lli = lli >> len;
514  }
515  return PAPI_OK;
516 }
517 #endif
#define PAPI_PROFIL_WEIGHTED
Definition: fpapi.h:77
memset(eventId, 0, size)
int errno
int _papi_hwi_using_signal[PAPI_NSIG]
Definition: extras.c:363
long long flags
Definition: iozone.c:12330
#define PRFDBG(format, args...)
Definition: papi_debug.h:69
int ns
Definition: iozone.c:20358
off64_t offset
Definition: iozone.c:1279
#define PAPI_PAUSED
Definition: fpapi.h:31
caddr_t pr_off
Definition: papi.h:580
#define PAPI_PROFIL_RANDOM
Definition: fpapi.h:76
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
#define PAPI_PROFIL_COMPRESS
Definition: fpapi.h:78
#define PAPI_ENOCMP
Definition: fpapi.h:122
#define PAPI_PROFIL_POSIX
Definition: fpapi.h:75
void
Definition: iozone.c:18627
inline_static int profil_increment(long long value, int flags, long long excess, long long threshold)
Definition: extras.c:56
#define PAPI_OVERFLOWING
Definition: fpapi.h:33
void double value
Definition: iozone.c:18781
#define PAPI_PROFILING
Definition: fpapi.h:34
unsigned pr_scale
Definition: papi.h:581
Return codes and api definitions.
#define INTDBG(format, args...)
Definition: papi_debug.h:65
unsigned pr_size
Definition: papi.h:579
t
Definition: iozone.c:23562
#define DEADBEEF
Definition: papi_internal.h:26
int i
Definition: fileop.c:140
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:64
EventSetOverflowInfo_t overflow
static unsigned int _rnum
Definition: extras.c:37
#define PAPI_OVERFLOW_HARDWARE
Definition: papi.h:410
unsigned long int tid
Definition: threads.h:25
struct _ThreadInfo * master
int ffsll(long long lli)
Definition: extras.c:497
int k
Definition: iozone.c:19136
static int cidx
Definition: event_info.c:40
#define OVFDBG(format, args...)
Definition: papi_debug.h:68
int _papi_hwi_read(hwd_context_t *context, EventSetInfo_t *ESI, long long *values)
hwd_ucontext_t * ucontext
PAPI_sprofil_t ** prof
void int num
Definition: iozone.c:22151
int _papi_hwi_stop_timer(int timer, int signal)
Definition: extras.c:461
hwd_context_t ** context
Definition: threads.h:28
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:78
void * thread(void *arg)
Definition: kufrin.c:31
#define PAPI_PROFIL_BUCKET_16
Definition: fpapi.h:79
int _papi_hwi_start_timer(int timer, int signal, int ns)
Definition: extras.c:366
long long
Definition: iozone.c:19827
void PAPIERROR(char *format,...)
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:401
#define inline_static
#define PAPI_NSIG
Definition: papi_internal.h:60
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:441
EventInfo_t * EventInfoArray
unsigned long int(* _papi_hwi_thread_id_fn)(void)
Definition: threads.c:42
#define PAPI_ESYS
Definition: fpapi.h:108
int threshold
static void posix_profil(caddr_t address, PAPI_sprofil_t *prof, int flags, long long excess, long long threshold)
Definition: extras.c:92
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
long long * sw_stop
int _papi_hwi_broadcast_signal(unsigned int mytid)
#define PAPI_EBUG
Definition: fpapi.h:111
#define INTERNAL_LOCK
Definition: papi_internal.h:85
EventSetInfo_t ** running_eventset
Definition: threads.h:30
void _papi_hwi_dispatch_profile(EventSetInfo_t *ESI, caddr_t pc, long long over, int profile_index)
Definition: extras.c:163
int
Definition: iozone.c:18528
int temp
Definition: iozone.c:22158
inline_static ThreadInfo_t * _papi_hwi_lookup_thread(int custom_tid)
Definition: threads.h:92
struct papi_vectors * _papi_hwd[]
inline_static unsigned short random_ushort(void)
Definition: extras.c:44
#define PAPI_PROFIL_BUCKET_32
Definition: fpapi.h:80
void * pr_base
Definition: papi.h:578
int _papi_hwi_dispatch_overflow_signal(void *papiContext, caddr_t address, int *isHardware, long long overflow_bit, int genOverflowBit, ThreadInfo_t **t, int cidx)
Definition: extras.c:214
EventSetProfileInfo_t profile
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
signal(SIGINT, signal_handler)
PAPI_overflow_handler_t handler