PAPI  5.6.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
extras.c
Go to the documentation of this file.
1 /****************************/
2 /* THIS IS OPEN SOURCE CODE */
3 /****************************/
4 
5 /*
6 * File: extras.c
7 * Author: Philip Mucci
8 * mucci@cs.utk.edu
9 * Mods: dan terpstra
10 * terpstra@cs.utk.edu
11 * Mods: Haihang You
12 * you@cs.utk.edu
13 * Mods: Kevin London
14 * london@cs.utk.edu
15 * Mods: Maynard Johnson
16 * maynardj@us.ibm.com
17 */
18 
19 /* This file contains portable routines to do things that we wish the
20 vendors did in the kernel extensions or performance libraries. */
21 
22 #include "papi.h"
23 #include "papi_internal.h"
24 #include "papi_vector.h"
25 #include "papi_memory.h"
26 #include "extras.h"
27 #include "threads.h"
28 
29 #if (!defined(HAVE_FFSLL) || defined(__bgp__))
30 int ffsll( long long lli );
31 #else
32 #include <string.h>
33 #endif
34 
35 /****************/
36 /* BEGIN LOCALS */
37 /****************/
38 
39 static unsigned int _rnum = DEADBEEF;
40 
41 /**************/
42 /* END LOCALS */
43 /**************/
44 
45 inline_static unsigned short
47 {
48  return ( unsigned short ) ( _rnum = 1664525 * _rnum + 1013904223 );
49 }
50 
51 
52 /* compute the amount by which to increment the bucket.
53  value is the current value of the bucket
54  this routine is used by all three profiling cases
55  it is inlined for speed
56 */
57 inline_static int
59  int flags, long long excess, long long threshold )
60 {
61  int increment = 1;
62 
63  if ( flags == PAPI_PROFIL_POSIX ) {
64  return ( 1 );
65  }
66 
67  if ( flags & PAPI_PROFIL_RANDOM ) {
68  if ( random_ushort( ) <= ( USHRT_MAX / 4 ) )
69  return ( 0 );
70  }
71 
72  if ( flags & PAPI_PROFIL_COMPRESS ) {
73  /* We're likely to ignore the sample if buf[address] gets big. */
74  if ( random_ushort( ) < value ) {
75  return ( 0 );
76  }
77  }
78 
79  if ( flags & PAPI_PROFIL_WEIGHTED ) { /* Increment is between 1 and 255 */
80  if ( excess <= ( long long ) 1 )
81  increment = 1;
82  else if ( excess > threshold )
83  increment = 255;
84  else {
85  threshold = threshold / ( long long ) 255;
86  increment = ( int ) ( excess / threshold );
87  }
88  }
89  return ( increment );
90 }
91 
92 
93 static void
94 posix_profil( caddr_t address, PAPI_sprofil_t * prof,
95  int flags, long long excess, long long threshold )
96 {
97  unsigned short *buf16;
98  unsigned int *buf32;
99  unsigned long long *buf64;
100  unsigned long indx;
101  unsigned long long lloffset;
102 
103  /* SPECIAL CASE: if starting address is 0 and scale factor is 2
104  then all counts go into first bin.
105  */
106  if ( ( prof->pr_off == 0 ) && ( prof->pr_scale == 0x2 ) )
107  indx = 0;
108  else {
109  /* compute the profile buffer offset by:
110  - subtracting the profiling base address from the pc address
111  - multiplying by the scaling factor
112  - dividing by max scale (65536, or 2^^16)
113  - dividing by implicit 2 (2^^1 for a total of 2^^17), for even addresses
114  NOTE: 131072 is a valid scale value. It produces byte resolution of addresses
115  */
116  lloffset =
117  ( unsigned long long ) ( ( address - prof->pr_off ) *
118  prof->pr_scale );
119  indx = ( unsigned long ) ( lloffset >> 17 );
120  }
121 
122  /* confirm addresses within specified range */
123  if ( address >= prof->pr_off ) {
124  /* test first for 16-bit buckets; this should be the fast case */
125  if ( flags & PAPI_PROFIL_BUCKET_16 ) {
126  if ( ( indx * sizeof ( short ) ) < prof->pr_size ) {
127  buf16 = (unsigned short *) prof->pr_base;
128  buf16[indx] =
129  ( unsigned short ) ( ( unsigned short ) buf16[indx] +
130  profil_increment( buf16[indx], flags,
131  excess,
132  threshold ) );
133  PRFDBG( "posix_profil_16() bucket %lu = %u\n", indx,
134  buf16[indx] );
135  }
136  }
137  /* next, look for the 32-bit case */
138  else if ( flags & PAPI_PROFIL_BUCKET_32 ) {
139  if ( ( indx * sizeof ( int ) ) < prof->pr_size ) {
140  buf32 = (unsigned int *) prof->pr_base;
141  buf32[indx] = ( unsigned int ) buf32[indx] +
142  ( unsigned int ) profil_increment( buf32[indx], flags,
143  excess, threshold );
144  PRFDBG( "posix_profil_32() bucket %lu = %u\n", indx,
145  buf32[indx] );
146  }
147  }
148  /* finally, fall through to the 64-bit case */
149  else {
150  if ( ( indx * sizeof ( long long ) ) < prof->pr_size ) {
151  buf64 = (unsigned long long *) prof->pr_base;
152  buf64[indx] = ( unsigned long long ) buf64[indx] +
153  ( unsigned long long ) profil_increment( ( long long )
154  buf64[indx], flags,
155  excess,
156  threshold );
157  PRFDBG( "posix_profil_64() bucket %lu = %lld\n", indx,
158  buf64[indx] );
159  }
160  }
161  }
162 }
163 
164 void
166  long long over, int profile_index )
167 {
168  EventSetProfileInfo_t *profile = &ESI->profile;
169  PAPI_sprofil_t *sprof;
170  caddr_t offset = 0;
171  caddr_t best_offset = 0;
172  int count;
173  int best_index = -1;
174  int i;
175 
176  PRFDBG( "handled IP %p\n", pc );
177 
178  sprof = profile->prof[profile_index];
179  count = profile->count[profile_index];
180 
181  for ( i = 0; i < count; i++ ) {
182  offset = sprof[i].pr_off;
183  if ( ( offset < pc ) && ( offset > best_offset ) ) {
184  best_index = i;
185  best_offset = offset;
186  }
187  }
188 
189  if ( best_index == -1 )
190  best_index = 0;
191 
192  posix_profil( pc, &sprof[best_index], profile->flags, over,
193  profile->threshold[profile_index] );
194 }
195 
196 /* if isHardware is true, then the processor is using hardware overflow,
197  else it is using software overflow. Use this parameter instead of
198  _papi_hwi_system_info.supports_hw_overflow is in CRAY some processors
199  may use hardware overflow, some may use software overflow.
200 
201  overflow_bit: if the component can get the overflow bit when overflow
202  occurs, then this should be passed by the component;
203 
204  If both genOverflowBit and isHardwareSupport are true, that means
205  the component doesn't know how to get the overflow bit from the
206  kernel directly, so we generate the overflow bit in this function
207  since this function can access the ESI->overflow struct;
208  (The component can only set genOverflowBit parameter to true if the
209  hardware doesn't support multiple hardware overflow. If the
210  component supports multiple hardware overflow and you don't know how
211  to get the overflow bit, then I don't know how to deal with this
212  situation).
213 */
214 
215 int
216 _papi_hwi_dispatch_overflow_signal( void *papiContext, caddr_t address,
217  int *isHardware, long long overflow_bit,
218  int genOverflowBit, ThreadInfo_t ** t,
219  int cidx )
220 {
221  int retval, event_counter, i, overflow_flag, pos;
222  int papi_index, j;
223  int profile_index = 0;
224  long long overflow_vector;
225 
226  long long temp[_papi_hwd[cidx]->cmp_info.num_cntrs], over;
227  long long latest = 0;
229  EventSetInfo_t *ESI;
230  _papi_hwi_context_t *ctx = ( _papi_hwi_context_t * ) papiContext;
231 
232  OVFDBG( "enter\n" );
233 
234  if ( *t )
235  thread = *t;
236  else
237  *t = thread = _papi_hwi_lookup_thread( 0 );
238 
239  if ( thread != NULL ) {
240  ESI = thread->running_eventset[cidx];
241 
242  if ( ( ESI == NULL ) || ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) {
243  OVFDBG( "Either no eventset or eventset not set to overflow.\n" );
244 #ifdef ANY_THREAD_GETS_SIGNAL
245  _papi_hwi_broadcast_signal( thread->tid );
246 #endif
247  return ( PAPI_OK );
248  }
249 
250  if ( ESI->CmpIdx != cidx )
251  return ( PAPI_ENOCMP );
252 
253  if ( ESI->master != thread ) {
254  PAPIERROR
255  ( "eventset->thread %#lx vs. current thread %#lx mismatch",
256  ESI->master, thread );
257  return ( PAPI_EBUG );
258  }
259 
260  if ( isHardware ) {
261  if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) {
262  ESI->state |= PAPI_PAUSED;
263  *isHardware = 1;
264  } else
265  *isHardware = 0;
266  }
267  /* Get the latest counter value */
268  event_counter = ESI->overflow.event_counter;
269 
270  overflow_flag = 0;
271  overflow_vector = 0;
272 
273  if ( !( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) {
274  retval = _papi_hwi_read( thread->context[cidx], ESI, ESI->sw_stop );
275  if ( retval < PAPI_OK )
276  return ( retval );
277  for ( i = 0; i < event_counter; i++ ) {
278  papi_index = ESI->overflow.EventIndex[i];
279  latest = ESI->sw_stop[papi_index];
280  temp[i] = -1;
281 
282  if ( latest >= ( long long ) ESI->overflow.deadline[i] ) {
283  OVFDBG
284  ( "dispatch_overflow() latest %lld, deadline %lld, threshold %d\n",
285  latest, ESI->overflow.deadline[i],
286  ESI->overflow.threshold[i] );
287  pos = ESI->EventInfoArray[papi_index].pos[0];
288  overflow_vector ^= ( long long ) 1 << pos;
289  temp[i] = latest - ESI->overflow.deadline[i];
290  overflow_flag = 1;
291  /* adjust the deadline */
292  ESI->overflow.deadline[i] =
293  latest + ESI->overflow.threshold[i];
294  }
295  }
296  } else if ( genOverflowBit ) {
297  /* we had assumed the overflow event can't be derived event */
298  papi_index = ESI->overflow.EventIndex[0];
299 
300  /* suppose the pos is the same as the counter number
301  * (this is not true in Itanium, but itanium doesn't
302  * need us to generate the overflow bit
303  */
304  pos = ESI->EventInfoArray[papi_index].pos[0];
305  overflow_vector = ( long long ) 1 << pos;
306  } else
307  overflow_vector = overflow_bit;
308 
309  if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) || overflow_flag ) {
310  if ( ESI->state & PAPI_PROFILING ) {
311  int k = 0;
312  while ( overflow_vector ) {
313  i = ffsll( overflow_vector ) - 1;
314  for ( j = 0; j < event_counter; j++ ) {
315  papi_index = ESI->overflow.EventIndex[j];
316  /* This loop is here ONLY because Pentium 4 can have tagged *
317  * events that contain more than one counter without being *
318  * derived. You've gotta scan all terms to make sure you *
319  * find the one to profile. */
320  for ( k = 0, pos = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT && pos >= 0;
321  k++ ) {
322  pos = ESI->EventInfoArray[papi_index].pos[k];
323  if ( i == pos ) {
324  profile_index = j;
325  goto foundit;
326  }
327  }
328  }
329  if ( j == event_counter ) {
330  PAPIERROR
331  ( "BUG! overflow_vector is 0, dropping interrupt" );
332  return ( PAPI_EBUG );
333  }
334 
335  foundit:
336  if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) )
337  over = 0;
338  else
339  over = temp[profile_index];
340  _papi_hwi_dispatch_profile( ESI, address, over,
341  profile_index );
342  overflow_vector ^= ( long long ) 1 << i;
343  }
344  /* do not use overflow_vector after this place */
345  } else {
346  ESI->overflow.handler( ESI->EventSetIndex, ( void * ) address,
347  overflow_vector, ctx->ucontext );
348  }
349  }
350  ESI->state &= ~( PAPI_PAUSED );
351  }
352 #ifdef ANY_THREAD_GETS_SIGNAL
353  else {
354  OVFDBG( "I haven't been noticed by PAPI before\n" );
356  }
357 #endif
358  return ( PAPI_OK );
359 }
360 
361 #include <sys/time.h>
362 #include <errno.h>
363 #include <string.h>
364 
366 
367 int
368 _papi_hwi_start_timer( int timer, int signal, int ns )
369 {
370  struct itimerval value;
371  int us = ns / 1000;
372 
373  if ( us == 0 )
374  us = 1;
375 
376 #ifdef ANY_THREAD_GETS_SIGNAL
378  if ( ( _papi_hwi_using_signal[signal] - 1 ) ) {
379  INTDBG( "itimer already installed\n" );
381  return ( PAPI_OK );
382  }
384 #else
385  ( void ) signal; /*unused */
386 #endif
387 
388  value.it_interval.tv_sec = 0;
389  value.it_interval.tv_usec = us;
390  value.it_value.tv_sec = 0;
391  value.it_value.tv_usec = us;
392 
393  INTDBG( "Installing itimer %d, with %d us interval\n", timer, us );
394  if ( setitimer( timer, &value, NULL ) < 0 ) {
395  PAPIERROR( "setitimer errno %d", errno );
396  return ( PAPI_ESYS );
397  }
398 
399  return ( PAPI_OK );
400 }
401 
402 int
403 _papi_hwi_start_signal( int signal, int need_context, int cidx )
404 {
405  struct sigaction action;
406 
409  if ( _papi_hwi_using_signal[signal] - 1 ) {
410  INTDBG( "_papi_hwi_using_signal is now %d\n",
411  _papi_hwi_using_signal[signal] );
413  return ( PAPI_OK );
414  }
415 
416  memset( &action, 0x00, sizeof ( struct sigaction ) );
417  action.sa_flags = SA_RESTART;
418  action.sa_sigaction =
419  ( void ( * )( int, siginfo_t *, void * ) ) _papi_hwd[cidx]->
420  dispatch_timer;
421  if ( need_context )
422 #if (defined(_BGL) /*|| defined (__bgp__)*/)
423  action.sa_flags |= SIGPWR;
424 #else
425  action.sa_flags |= SA_SIGINFO;
426 #endif
427 
428  INTDBG( "installing signal handler\n" );
429  if ( sigaction( signal, &action, NULL ) < 0 ) {
430  PAPIERROR( "sigaction errno %d", errno );
432  return ( PAPI_ESYS );
433  }
434 
435  INTDBG( "_papi_hwi_using_signal[%d] is now %d.\n", signal,
436  _papi_hwi_using_signal[signal] );
438 
439  return ( PAPI_OK );
440 }
441 
442 int
444 {
446  if ( --_papi_hwi_using_signal[signal] == 0 ) {
447  INTDBG( "removing signal handler\n" );
448  if ( sigaction( signal, NULL, NULL ) == -1 ) {
449  PAPIERROR( "sigaction errno %d", errno );
451  return ( PAPI_ESYS );
452  }
453  }
454 
455  INTDBG( "_papi_hwi_using_signal[%d] is now %d\n", signal,
456  _papi_hwi_using_signal[signal] );
458 
459  return ( PAPI_OK );
460 }
461 
462 int
463 _papi_hwi_stop_timer( int timer, int signal )
464 {
465  struct itimerval value;
466 
467 #ifdef ANY_THREAD_GETS_SIGNAL
469  if ( _papi_hwi_using_signal[signal] > 1 ) {
470  INTDBG( "itimer in use by another thread\n" );
472  return ( PAPI_OK );
473  }
475 #else
476  ( void ) signal; /*unused */
477 #endif
478 
479  value.it_interval.tv_sec = 0;
480  value.it_interval.tv_usec = 0;
481  value.it_value.tv_sec = 0;
482  value.it_value.tv_usec = 0;
483 
484  INTDBG( "turning off timer\n" );
485  if ( setitimer( timer, &value, NULL ) == -1 ) {
486  PAPIERROR( "setitimer errno %d", errno );
487  return PAPI_ESYS;
488  }
489 
490  return PAPI_OK;
491 }
492 
493 
494 
495 #if (!defined(HAVE_FFSLL) || defined(__bgp__))
496 /* find the first set bit in long long */
497 
498 int
499 ffsll( long long lli )
500 {
501  int i, num, t, tmpint, len;
502 
503  num = sizeof ( long long ) / sizeof ( int );
504  if ( num == 1 )
505  return ( ffs( ( int ) lli ) );
506  len = sizeof ( int ) * CHAR_BIT;
507 
508  for ( i = 0; i < num; i++ ) {
509  tmpint = ( int ) ( ( ( lli >> len ) << len ) ^ lli );
510 
511  t = ffs( tmpint );
512  if ( t ) {
513  return ( t + i * len );
514  }
515  lli = lli >> len;
516  }
517  return PAPI_OK;
518 }
519 #endif
int errno
int _papi_hwi_using_signal[PAPI_NSIG]
Definition: extras.c:365
#define PAPI_OVERFLOWING
Definition: papi.h:378
long long flags
Definition: iozone.c:12330
#define PRFDBG(format, args...)
Definition: papi_debug.h:69
int ns
Definition: iozone.c:20358
off64_t offset
Definition: iozone.c:1279
#define PAPI_PROFIL_BUCKET_16
Definition: papi.h:400
caddr_t pr_off
Definition: papi.h:584
#define PAPI_EBUG
Definition: papi.h:259
return PAPI_OK
Definition: linux-nvml.c:497
int count
Definition: iozone.c:22422
void
Definition: iozone.c:18627
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
inline_static int profil_increment(long long value, int flags, long long excess, long long threshold)
Definition: extras.c:58
void double value
Definition: iozone.c:18781
papi_vector_t * _papi_hwd[]
unsigned pr_scale
Definition: papi.h:585
Return codes and api definitions.
#define INTDBG(format, args...)
Definition: papi_debug.h:65
unsigned pr_size
Definition: papi.h:583
t
Definition: iozone.c:23562
static int cidx
#define DEADBEEF
Definition: papi_internal.h:26
#define PAPI_PROFIL_COMPRESS
Definition: papi.h:399
int i
Definition: fileop.c:140
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:64
EventSetOverflowInfo_t overflow
static unsigned int _rnum
Definition: extras.c:39
#define PAPI_OVERFLOW_HARDWARE
Definition: papi.h:412
unsigned long int tid
Definition: threads.h:25
struct _ThreadInfo * master
int ffsll(long long lli)
Definition: extras.c:499
int k
Definition: iozone.c:19136
#define OVFDBG(format, args...)
Definition: papi_debug.h:68
int _papi_hwi_read(hwd_context_t *context, EventSetInfo_t *ESI, long long *values)
hwd_ucontext_t * ucontext
PAPI_sprofil_t ** prof
void int num
Definition: iozone.c:22151
int _papi_hwi_stop_timer(int timer, int signal)
Definition: extras.c:463
#define PAPI_ESYS
Definition: papi.h:255
hwd_context_t ** context
Definition: threads.h:28
#define PAPI_PROFIL_RANDOM
Definition: papi.h:397
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:78
void * thread(void *arg)
Definition: kufrin.c:38
int _papi_hwi_start_timer(int timer, int signal, int ns)
Definition: extras.c:368
long long
Definition: iozone.c:19827
void PAPIERROR(char *format,...)
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:403
#define inline_static
#define PAPI_NSIG
Definition: papi_internal.h:60
#define PAPI_PROFIL_BUCKET_32
Definition: papi.h:401
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:443
EventInfo_t * EventInfoArray
unsigned long int(* _papi_hwi_thread_id_fn)(void)
Definition: threads.c:42
static int threshold
static void posix_profil(caddr_t address, PAPI_sprofil_t *prof, int flags, long long excess, long long threshold)
Definition: extras.c:94
#define PAPI_ENOCMP
Definition: papi.h:270
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
long long * sw_stop
int _papi_hwi_broadcast_signal(unsigned int mytid)
#define PAPI_PROFIL_POSIX
Definition: papi.h:396
#define PAPI_PROFIL_WEIGHTED
Definition: papi.h:398
#define INTERNAL_LOCK
Definition: papi_internal.h:86
#define PAPI_PROFILING
Definition: papi.h:379
EventSetInfo_t ** running_eventset
Definition: threads.h:30
void _papi_hwi_dispatch_profile(EventSetInfo_t *ESI, caddr_t pc, long long over, int profile_index)
Definition: extras.c:165
int
Definition: iozone.c:18528
int temp
Definition: iozone.c:22158
inline_static ThreadInfo_t * _papi_hwi_lookup_thread(int custom_tid)
Definition: threads.h:92
inline_static unsigned short random_ushort(void)
Definition: extras.c:46
void * pr_base
Definition: papi.h:582
#define PAPI_PAUSED
Definition: papi.h:376
int _papi_hwi_dispatch_overflow_signal(void *papiContext, caddr_t address, int *isHardware, long long overflow_bit, int genOverflowBit, ThreadInfo_t **t, int cidx)
Definition: extras.c:216
EventSetProfileInfo_t profile
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
signal(SIGINT, signal_handler)
PAPI_overflow_handler_t handler