PAPI  5.3.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
x86_cpuid_info.c
Go to the documentation of this file.
1 /****************************/
2 /* THIS IS OPEN SOURCE CODE */
3 /****************************/
4 
5 /*
6 * File: x86_cpuid_info.c
7 * Author: Dan Terpstra
8 * terpstra@eecs.utk.edu
9 * complete rewrite of linux-memory.c to conform to latest docs
10 * and convert Intel to a table driven implementation.
11 * Now also supports multiple TLB descriptors
12 */
13 
14 #include <string.h>
15 #include <stdio.h>
16 #include "papi.h"
17 #include "papi_internal.h"
18 
19 
20 static void init_mem_hierarchy( PAPI_mh_info_t * mh_info );
21 static int init_amd( PAPI_mh_info_t * mh_info, int *levels );
22 static short int _amd_L2_L3_assoc( unsigned short int pattern );
23 static int init_intel( PAPI_mh_info_t * mh_info , int *levels);
24 
25 static inline void
26 cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d )
27 {
28  unsigned int op = *a;
29  // .byte 0x53 == push ebx. it's universal for 32 and 64 bit
30  // .byte 0x5b == pop ebx.
31  // Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode.
32  // Using the opcode directly avoids this problem.
33  __asm__ __volatile__( ".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b":"=a"( *a ), "=S"( *b ), "=c"( *c ),
34  "=d"
35  ( *d )
36  : "a"( op ) );
37 }
38 
39 int
41 {
42  int retval = 0;
43  union
44  {
45  struct
46  {
47  unsigned int ax, bx, cx, dx;
48  } e;
49  char vendor[20]; /* leave room for terminator bytes */
50  } reg;
51 
52  /* Don't use cpu_type to determine the processor.
53  * get the information directly from the chip.
54  */
55  reg.e.ax = 0; /* function code 0: vendor string */
56  /* The vendor string is composed of EBX:EDX:ECX.
57  * by swapping the register addresses in the call below,
58  * the string is correctly composed in the char array.
59  */
60  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.dx, &reg.e.cx );
61  reg.vendor[16] = 0;
62  MEMDBG( "Vendor: %s\n", &reg.vendor[4] );
63 
64  init_mem_hierarchy( mh_info );
65 
66  if ( !strncmp( "GenuineIntel", &reg.vendor[4], 12 ) ) {
67  init_intel( mh_info, &mh_info->levels);
68  } else if ( !strncmp( "AuthenticAMD", &reg.vendor[4], 12 ) ) {
69  init_amd( mh_info, &mh_info->levels );
70  } else {
71  MEMDBG( "Unsupported cpu type; Not Intel or AMD x86\n" );
72  return PAPI_ENOIMPL;
73  }
74 
75  /* This works only because an empty cache element is initialized to 0 */
76  MEMDBG( "Detected L1: %d L2: %d L3: %d\n",
77  mh_info->level[0].cache[0].size + mh_info->level[0].cache[1].size,
78  mh_info->level[1].cache[0].size + mh_info->level[1].cache[1].size,
79  mh_info->level[2].cache[0].size + mh_info->level[2].cache[1].size );
80  return retval;
81 }
82 
83 static void
85 {
86  int i, j;
87  PAPI_mh_level_t *L = mh_info->level;
88 
89  /* initialize entire memory hierarchy structure to benign values */
90  for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
91  for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
93  L[i].tlb[j].num_entries = 0;
94  L[i].tlb[j].associativity = 0;
96  L[i].cache[j].size = 0;
97  L[i].cache[j].line_size = 0;
98  L[i].cache[j].num_lines = 0;
99  L[i].cache[j].associativity = 0;
100  }
101  }
102 }
103 
104 static short int
105 _amd_L2_L3_assoc( unsigned short int pattern )
106 {
107  /* From "CPUID Specification" #25481 Rev 2.28, April 2008 */
108  short int assoc[16] =
109  { 0, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, SHRT_MAX };
110  if ( pattern > 0xF )
111  return -1;
112  return ( assoc[pattern] );
113 }
114 
115 /* Cache configuration for AMD Athlon/Duron */
116 static int
117 init_amd( PAPI_mh_info_t * mh_info, int *num_levels )
118 {
119  union
120  {
121  struct
122  {
123  unsigned int ax, bx, cx, dx;
124  } e;
125  unsigned char byt[16];
126  } reg;
127  int i, j, levels = 0;
128  PAPI_mh_level_t *L = mh_info->level;
129 
130  /*
131  * Layout of CPU information taken from :
132  * "CPUID Specification" #25481 Rev 2.28, April 2008 for most current info.
133  */
134 
135  MEMDBG( "Initializing AMD memory info\n" );
136  /* AMD level 1 cache info */
137  reg.e.ax = 0x80000005; /* extended function code 5: L1 Cache and TLB Identifiers */
138  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
139 
140  MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n",
141  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
142  MEMDBG
143  ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
144  reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
145  reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
146  reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
147  reg.byt[15] );
148 
149  /* NOTE: We assume L1 cache and TLB always exists */
150  /* L1 TLB info */
151 
152  /* 4MB memory page information; half the number of entries as 2MB */
153  L[0].tlb[0].type = PAPI_MH_TYPE_INST;
154  L[0].tlb[0].num_entries = reg.byt[0] / 2;
155  L[0].tlb[0].page_size = 4096 << 10;
156  L[0].tlb[0].associativity = reg.byt[1];
157 
158  L[0].tlb[1].type = PAPI_MH_TYPE_DATA;
159  L[0].tlb[1].num_entries = reg.byt[2] / 2;
160  L[0].tlb[1].page_size = 4096 << 10;
161  L[0].tlb[1].associativity = reg.byt[3];
162 
163  /* 2MB memory page information */
164  L[0].tlb[2].type = PAPI_MH_TYPE_INST;
165  L[0].tlb[2].num_entries = reg.byt[0];
166  L[0].tlb[2].page_size = 2048 << 10;
167  L[0].tlb[2].associativity = reg.byt[1];
168 
169  L[0].tlb[3].type = PAPI_MH_TYPE_DATA;
170  L[0].tlb[3].num_entries = reg.byt[2];
171  L[0].tlb[3].page_size = 2048 << 10;
172  L[0].tlb[3].associativity = reg.byt[3];
173 
174  /* 4k page information */
175  L[0].tlb[4].type = PAPI_MH_TYPE_INST;
176  L[0].tlb[4].num_entries = reg.byt[4];
177  L[0].tlb[4].page_size = 4 << 10;
178  L[0].tlb[4].associativity = reg.byt[5];
179 
180  L[0].tlb[5].type = PAPI_MH_TYPE_DATA;
181  L[0].tlb[5].num_entries = reg.byt[6];
182  L[0].tlb[5].page_size = 4 << 10;
183  L[0].tlb[5].associativity = reg.byt[7];
184 
185  for ( i = 0; i < PAPI_MH_MAX_LEVELS; i++ ) {
186  if ( L[0].tlb[i].associativity == 0xff )
187  L[0].tlb[i].associativity = SHRT_MAX;
188  }
189 
190  /* L1 D-cache info */
191  L[0].cache[0].type =
193  L[0].cache[0].size = reg.byt[11] << 10;
194  L[0].cache[0].associativity = reg.byt[10];
195  L[0].cache[0].line_size = reg.byt[8];
196  /* Byt[9] is "Lines per tag" */
197  /* Is that == lines per cache? */
198  /* L[0].cache[1].num_lines = reg.byt[9]; */
199  if ( L[0].cache[0].line_size )
200  L[0].cache[0].num_lines = L[0].cache[0].size / L[0].cache[0].line_size;
201  MEMDBG( "D-Cache Line Count: %d; Computed: %d\n", reg.byt[9],
202  L[0].cache[0].num_lines );
203 
204  /* L1 I-cache info */
205  L[0].cache[1].type = PAPI_MH_TYPE_INST;
206  L[0].cache[1].size = reg.byt[15] << 10;
207  L[0].cache[1].associativity = reg.byt[14];
208  L[0].cache[1].line_size = reg.byt[12];
209  /* Byt[13] is "Lines per tag" */
210  /* Is that == lines per cache? */
211  /* L[0].cache[1].num_lines = reg.byt[13]; */
212  if ( L[0].cache[1].line_size )
213  L[0].cache[1].num_lines = L[0].cache[1].size / L[0].cache[1].line_size;
214  MEMDBG( "I-Cache Line Count: %d; Computed: %d\n", reg.byt[13],
215  L[0].cache[1].num_lines );
216 
217  for ( i = 0; i < 2; i++ ) {
218  if ( L[0].cache[i].associativity == 0xff )
219  L[0].cache[i].associativity = SHRT_MAX;
220  }
221 
222  /* AMD L2/L3 Cache and L2 TLB info */
223  /* NOTE: For safety we assume L2 and L3 cache and TLB may not exist */
224 
225  reg.e.ax = 0x80000006; /* extended function code 6: L2/L3 Cache and L2 TLB Identifiers */
226  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
227 
228  MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n",
229  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
230  MEMDBG
231  ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
232  reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
233  reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
234  reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
235  reg.byt[15] );
236 
237  /* L2 TLB info */
238 
239  if ( reg.byt[0] | reg.byt[1] ) { /* Level 2 ITLB exists */
240  /* 4MB ITLB page information; half the number of entries as 2MB */
241  L[1].tlb[0].type = PAPI_MH_TYPE_INST;
242  L[1].tlb[0].num_entries =
243  ( ( ( short ) ( reg.byt[1] & 0xF ) << 8 ) + reg.byt[0] ) / 2;
244  L[1].tlb[0].page_size = 4096 << 10;
245  L[1].tlb[0].associativity =
246  _amd_L2_L3_assoc( ( reg.byt[1] & 0xF0 ) >> 4 );
247 
248  /* 2MB ITLB page information */
249  L[1].tlb[2].type = PAPI_MH_TYPE_INST;
250  L[1].tlb[2].num_entries = L[1].tlb[0].num_entries * 2;
251  L[1].tlb[2].page_size = 2048 << 10;
252  L[1].tlb[2].associativity = L[1].tlb[0].associativity;
253  }
254 
255  if ( reg.byt[2] | reg.byt[3] ) { /* Level 2 DTLB exists */
256  /* 4MB DTLB page information; half the number of entries as 2MB */
257  L[1].tlb[1].type = PAPI_MH_TYPE_DATA;
258  L[1].tlb[1].num_entries =
259  ( ( ( short ) ( reg.byt[3] & 0xF ) << 8 ) + reg.byt[2] ) / 2;
260  L[1].tlb[1].page_size = 4096 << 10;
261  L[1].tlb[1].associativity =
262  _amd_L2_L3_assoc( ( reg.byt[3] & 0xF0 ) >> 4 );
263 
264  /* 2MB DTLB page information */
265  L[1].tlb[3].type = PAPI_MH_TYPE_DATA;
266  L[1].tlb[3].num_entries = L[1].tlb[1].num_entries * 2;
267  L[1].tlb[3].page_size = 2048 << 10;
268  L[1].tlb[3].associativity = L[1].tlb[1].associativity;
269  }
270 
271  /* 4k page information */
272  if ( reg.byt[4] | reg.byt[5] ) { /* Level 2 ITLB exists */
273  L[1].tlb[4].type = PAPI_MH_TYPE_INST;
274  L[1].tlb[4].num_entries =
275  ( ( short ) ( reg.byt[5] & 0xF ) << 8 ) + reg.byt[4];
276  L[1].tlb[4].page_size = 4 << 10;
277  L[1].tlb[4].associativity =
278  _amd_L2_L3_assoc( ( reg.byt[5] & 0xF0 ) >> 4 );
279  }
280  if ( reg.byt[6] | reg.byt[7] ) { /* Level 2 DTLB exists */
281  L[1].tlb[5].type = PAPI_MH_TYPE_DATA;
282  L[1].tlb[5].num_entries =
283  ( ( short ) ( reg.byt[7] & 0xF ) << 8 ) + reg.byt[6];
284  L[1].tlb[5].page_size = 4 << 10;
285  L[1].tlb[5].associativity =
286  _amd_L2_L3_assoc( ( reg.byt[7] & 0xF0 ) >> 4 );
287  }
288 
289  /* AMD Level 2 cache info */
290  if ( reg.e.cx ) {
291  L[1].cache[0].type =
293  L[1].cache[0].size = ( int ) ( ( reg.e.cx & 0xffff0000 ) >> 6 ); /* right shift by 16; multiply by 2^10 */
294  L[1].cache[0].associativity =
295  _amd_L2_L3_assoc( ( reg.byt[9] & 0xF0 ) >> 4 );
296  L[1].cache[0].line_size = reg.byt[8];
297 /* L[1].cache[0].num_lines = reg.byt[9]&0xF; */
298  if ( L[1].cache[0].line_size )
299  L[1].cache[0].num_lines =
300  L[1].cache[0].size / L[1].cache[0].line_size;
301  MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[9] & 0xF,
302  L[1].cache[0].num_lines );
303  }
304 
305  /* AMD Level 3 cache info (shared across cores) */
306  if ( reg.e.dx ) {
307  L[2].cache[0].type =
309  L[2].cache[0].size = ( int ) ( reg.e.dx & 0xfffc0000 ) << 1; /* in blocks of 512KB (2^19) */
310  L[2].cache[0].associativity =
311  _amd_L2_L3_assoc( ( reg.byt[13] & 0xF0 ) >> 4 );
312  L[2].cache[0].line_size = reg.byt[12];
313 /* L[2].cache[0].num_lines = reg.byt[13]&0xF; */
314  if ( L[2].cache[0].line_size )
315  L[2].cache[0].num_lines =
316  L[2].cache[0].size / L[2].cache[0].line_size;
317  MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[13] & 0xF,
318  L[1].cache[0].num_lines );
319  }
320  for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
321  for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
322  /* Compute the number of levels of hierarchy actually used */
323  if ( L[i].tlb[j].type != PAPI_MH_TYPE_EMPTY ||
324  L[i].cache[j].type != PAPI_MH_TYPE_EMPTY )
325  levels = i + 1;
326  }
327  }
328  *num_levels = levels;
329  return PAPI_OK;
330 }
331 
332  /*
333  * The data from this table now comes from figure 3-17 in
334  * the Intel Architectures Software Reference Manual 2A
335  * (cpuid instruction section)
336  *
337  * Pretviously the information was provided by
338  * "Intel® Processor Identification and the CPUID Instruction",
339  * Application Note, AP-485, Nov 2008, 241618-033
340  * Updated to AP-485, Aug 2009, 241618-036
341  *
342  * The following data structure and its instantiation trys to
343  * capture all the information in Section 2.1.3 of the above
344  * document. Not all of it is used by PAPI, but it could be.
345  * As the above document is revised, this table should be
346  * updated.
347  */
348 
349 #define TLB_SIZES 3 /* number of different page sizes for a single TLB descriptor */
351 {
352  int descriptor; /* 0x00 - 0xFF: register descriptor code */
353  int level; /* 1 to PAPI_MH_MAX_LEVELS */
354  int type; /* Empty, instr, data, vector, unified | TLB */
355  int size[TLB_SIZES]; /* cache or TLB page size(s) in kB */
356  int associativity; /* SHRT_MAX == fully associative */
357  int sector; /* 1 if cache is sectored; else 0 */
358  int line_size; /* for cache */
359  int entries; /* for TLB */
360 };
361 
362 static struct _intel_cache_info intel_cache[] = {
363 // 0x01
364  {.descriptor = 0x01,
365  .level = 1,
367  .size[0] = 4,
368  .associativity = 4,
369  .entries = 32,
370  },
371 // 0x02
372  {.descriptor = 0x02,
373  .level = 1,
375  .size[0] = 4096,
376  .associativity = SHRT_MAX,
377  .entries = 2,
378  },
379 // 0x03
380  {.descriptor = 0x03,
381  .level = 1,
383  .size[0] = 4,
384  .associativity = 4,
385  .entries = 64,
386  },
387 // 0x04
388  {.descriptor = 0x04,
389  .level = 1,
391  .size[0] = 4096,
392  .associativity = 4,
393  .entries = 8,
394  },
395 // 0x05
396  {.descriptor = 0x05,
397  .level = 1,
399  .size[0] = 4096,
400  .associativity = 4,
401  .entries = 32,
402  },
403 // 0x06
404  {.descriptor = 0x06,
405  .level = 1,
406  .type = PAPI_MH_TYPE_INST,
407  .size[0] = 8,
408  .associativity = 4,
409  .line_size = 32,
410  },
411 // 0x08
412  {.descriptor = 0x08,
413  .level = 1,
414  .type = PAPI_MH_TYPE_INST,
415  .size[0] = 16,
416  .associativity = 4,
417  .line_size = 32,
418  },
419 // 0x09
420  {.descriptor = 0x09,
421  .level = 1,
422  .type = PAPI_MH_TYPE_INST,
423  .size[0] = 32,
424  .associativity = 4,
425  .line_size = 64,
426  },
427 // 0x0A
428  {.descriptor = 0x0A,
429  .level = 1,
430  .type = PAPI_MH_TYPE_DATA,
431  .size[0] = 8,
432  .associativity = 2,
433  .line_size = 32,
434  },
435 // 0x0B
436  {.descriptor = 0x0B,
437  .level = 1,
439  .size[0] = 4096,
440  .associativity = 4,
441  .entries = 4,
442  },
443 // 0x0C
444  {.descriptor = 0x0C,
445  .level = 1,
446  .type = PAPI_MH_TYPE_DATA,
447  .size[0] = 16,
448  .associativity = 4,
449  .line_size = 32,
450  },
451 // 0x0D
452  {.descriptor = 0x0D,
453  .level = 1,
454  .type = PAPI_MH_TYPE_DATA,
455  .size[0] = 16,
456  .associativity = 4,
457  .line_size = 64,
458  },
459 // 0x0E
460  {.descriptor = 0x0E,
461  .level = 1,
462  .type = PAPI_MH_TYPE_DATA,
463  .size[0] = 24,
464  .associativity = 6,
465  .line_size = 64,
466  },
467 // 0x21
468  {.descriptor = 0x21,
469  .level = 2,
470  .type = PAPI_MH_TYPE_UNIFIED,
471  .size[0] = 256,
472  .associativity = 8,
473  .line_size = 64,
474  },
475 // 0x22
476  {.descriptor = 0x22,
477  .level = 3,
478  .type = PAPI_MH_TYPE_UNIFIED,
479  .size[0] = 512,
480  .associativity = 4,
481  .sector = 1,
482  .line_size = 64,
483  },
484 // 0x23
485  {.descriptor = 0x23,
486  .level = 3,
487  .type = PAPI_MH_TYPE_UNIFIED,
488  .size[0] = 1024,
489  .associativity = 8,
490  .sector = 1,
491  .line_size = 64,
492  },
493 // 0x25
494  {.descriptor = 0x25,
495  .level = 3,
496  .type = PAPI_MH_TYPE_UNIFIED,
497  .size[0] = 2048,
498  .associativity = 8,
499  .sector = 1,
500  .line_size = 64,
501  },
502 // 0x29
503  {.descriptor = 0x29,
504  .level = 3,
505  .type = PAPI_MH_TYPE_UNIFIED,
506  .size[0] = 4096,
507  .associativity = 8,
508  .sector = 1,
509  .line_size = 64,
510  },
511 // 0x2C
512  {.descriptor = 0x2C,
513  .level = 1,
514  .type = PAPI_MH_TYPE_DATA,
515  .size[0] = 32,
516  .associativity = 8,
517  .line_size = 64,
518  },
519 // 0x30
520  {.descriptor = 0x30,
521  .level = 1,
522  .type = PAPI_MH_TYPE_INST,
523  .size[0] = 32,
524  .associativity = 8,
525  .line_size = 64,
526  },
527 // 0x39
528  {.descriptor = 0x39,
529  .level = 2,
530  .type = PAPI_MH_TYPE_UNIFIED,
531  .size[0] = 128,
532  .associativity = 4,
533  .sector = 1,
534  .line_size = 64,
535  },
536 // 0x3A
537  {.descriptor = 0x3A,
538  .level = 2,
539  .type = PAPI_MH_TYPE_UNIFIED,
540  .size[0] = 192,
541  .associativity = 6,
542  .sector = 1,
543  .line_size = 64,
544  },
545 // 0x3B
546  {.descriptor = 0x3B,
547  .level = 2,
548  .type = PAPI_MH_TYPE_UNIFIED,
549  .size[0] = 128,
550  .associativity = 2,
551  .sector = 1,
552  .line_size = 64,
553  },
554 // 0x3C
555  {.descriptor = 0x3C,
556  .level = 2,
557  .type = PAPI_MH_TYPE_UNIFIED,
558  .size[0] = 256,
559  .associativity = 4,
560  .sector = 1,
561  .line_size = 64,
562  },
563 // 0x3D
564  {.descriptor = 0x3D,
565  .level = 2,
566  .type = PAPI_MH_TYPE_UNIFIED,
567  .size[0] = 384,
568  .associativity = 6,
569  .sector = 1,
570  .line_size = 64,
571  },
572 // 0x3E
573  {.descriptor = 0x3E,
574  .level = 2,
575  .type = PAPI_MH_TYPE_UNIFIED,
576  .size[0] = 512,
577  .associativity = 4,
578  .sector = 1,
579  .line_size = 64,
580  },
581 // 0x40: no last level cache (??)
582 // 0x41
583  {.descriptor = 0x41,
584  .level = 2,
585  .type = PAPI_MH_TYPE_UNIFIED,
586  .size[0] = 128,
587  .associativity = 4,
588  .line_size = 32,
589  },
590 // 0x42
591  {.descriptor = 0x42,
592  .level = 2,
593  .type = PAPI_MH_TYPE_UNIFIED,
594  .size[0] = 256,
595  .associativity = 4,
596  .line_size = 32,
597  },
598 // 0x43
599  {.descriptor = 0x43,
600  .level = 2,
601  .type = PAPI_MH_TYPE_UNIFIED,
602  .size[0] = 512,
603  .associativity = 4,
604  .line_size = 32,
605  },
606 // 0x44
607  {.descriptor = 0x44,
608  .level = 2,
609  .type = PAPI_MH_TYPE_UNIFIED,
610  .size[0] = 1024,
611  .associativity = 4,
612  .line_size = 32,
613  },
614 // 0x45
615  {.descriptor = 0x45,
616  .level = 2,
617  .type = PAPI_MH_TYPE_UNIFIED,
618  .size[0] = 2048,
619  .associativity = 4,
620  .line_size = 32,
621  },
622 // 0x46
623  {.descriptor = 0x46,
624  .level = 3,
625  .type = PAPI_MH_TYPE_UNIFIED,
626  .size[0] = 4096,
627  .associativity = 4,
628  .line_size = 64,
629  },
630 // 0x47
631  {.descriptor = 0x47,
632  .level = 3,
633  .type = PAPI_MH_TYPE_UNIFIED,
634  .size[0] = 8192,
635  .associativity = 8,
636  .line_size = 64,
637  },
638 // 0x48
639  {.descriptor = 0x48,
640  .level = 2,
641  .type = PAPI_MH_TYPE_UNIFIED,
642  .size[0] = 3072,
643  .associativity = 12,
644  .line_size = 64,
645  },
646 // 0x49 NOTE: for family 0x0F model 0x06 this is level 3
647  {.descriptor = 0x49,
648  .level = 2,
649  .type = PAPI_MH_TYPE_UNIFIED,
650  .size[0] = 4096,
651  .associativity = 16,
652  .line_size = 64,
653  },
654 // 0x4A
655  {.descriptor = 0x4A,
656  .level = 3,
657  .type = PAPI_MH_TYPE_UNIFIED,
658  .size[0] = 6144,
659  .associativity = 12,
660  .line_size = 64,
661  },
662 // 0x4B
663  {.descriptor = 0x4B,
664  .level = 3,
665  .type = PAPI_MH_TYPE_UNIFIED,
666  .size[0] = 8192,
667  .associativity = 16,
668  .line_size = 64,
669  },
670 // 0x4C
671  {.descriptor = 0x4C,
672  .level = 3,
673  .type = PAPI_MH_TYPE_UNIFIED,
674  .size[0] = 12288,
675  .associativity = 12,
676  .line_size = 64,
677  },
678 // 0x4D
679  {.descriptor = 0x4D,
680  .level = 3,
681  .type = PAPI_MH_TYPE_UNIFIED,
682  .size[0] = 16384,
683  .associativity = 16,
684  .line_size = 64,
685  },
686 // 0x4E
687  {.descriptor = 0x4E,
688  .level = 2,
689  .type = PAPI_MH_TYPE_UNIFIED,
690  .size[0] = 6144,
691  .associativity = 24,
692  .line_size = 64,
693  },
694 // 0x4F
695  {.descriptor = 0x4F,
696  .level = 1,
698  .size[0] = 4,
699  .associativity = SHRT_MAX,
700  .entries = 32,
701  },
702 // 0x50
703  {.descriptor = 0x50,
704  .level = 1,
706  .size = {4, 2048, 4096},
707  .associativity = SHRT_MAX,
708  .entries = 64,
709  },
710 // 0x51
711  {.descriptor = 0x51,
712  .level = 1,
714  .size = {4, 2048, 4096},
715  .associativity = SHRT_MAX,
716  .entries = 128,
717  },
718 // 0x52
719  {.descriptor = 0x52,
720  .level = 1,
722  .size = {4, 2048, 4096},
723  .associativity = SHRT_MAX,
724  .entries = 256,
725  },
726 // 0x55
727  {.descriptor = 0x55,
728  .level = 1,
730  .size = {2048, 4096, 0},
731  .associativity = SHRT_MAX,
732  .entries = 7,
733  },
734 // 0x56
735  {.descriptor = 0x56,
736  .level = 1,
738  .size[0] = 4096,
739  .associativity = 4,
740  .entries = 16,
741  },
742 // 0x57
743  {.descriptor = 0x57,
744  .level = 1,
746  .size[0] = 4,
747  .associativity = 4,
748  .entries = 16,
749  },
750 // 0x59
751  {.descriptor = 0x59,
752  .level = 1,
754  .size[0] = 4,
755  .associativity = SHRT_MAX,
756  .entries = 16,
757  },
758 // 0x5A
759  {.descriptor = 0x5A,
760  .level = 1,
762  .size = {2048, 4096, 0},
763  .associativity = 4,
764  .entries = 32,
765  },
766 // 0x5B
767  {.descriptor = 0x5B,
768  .level = 1,
770  .size = {4, 4096, 0},
771  .associativity = SHRT_MAX,
772  .entries = 64,
773  },
774 // 0x5C
775  {.descriptor = 0x5C,
776  .level = 1,
778  .size = {4, 4096, 0},
779  .associativity = SHRT_MAX,
780  .entries = 128,
781  },
782 // 0x5D
783  {.descriptor = 0x5D,
784  .level = 1,
786  .size = {4, 4096, 0},
787  .associativity = SHRT_MAX,
788  .entries = 256,
789  },
790 // 0x60
791  {.descriptor = 0x60,
792  .level = 1,
793  .type = PAPI_MH_TYPE_DATA,
794  .size[0] = 16,
795  .associativity = 8,
796  .sector = 1,
797  .line_size = 64,
798  },
799 // 0x66
800  {.descriptor = 0x66,
801  .level = 1,
802  .type = PAPI_MH_TYPE_DATA,
803  .size[0] = 8,
804  .associativity = 4,
805  .sector = 1,
806  .line_size = 64,
807  },
808 // 0x67
809  {.descriptor = 0x67,
810  .level = 1,
811  .type = PAPI_MH_TYPE_DATA,
812  .size[0] = 16,
813  .associativity = 4,
814  .sector = 1,
815  .line_size = 64,
816  },
817 // 0x68
818  {.descriptor = 0x68,
819  .level = 1,
820  .type = PAPI_MH_TYPE_DATA,
821  .size[0] = 32,
822  .associativity = 4,
823  .sector = 1,
824  .line_size = 64,
825  },
826 // 0x70
827  {.descriptor = 0x70,
828  .level = 1,
829  .type = PAPI_MH_TYPE_TRACE,
830  .size[0] = 12,
831  .associativity = 8,
832  },
833 // 0x71
834  {.descriptor = 0x71,
835  .level = 1,
836  .type = PAPI_MH_TYPE_TRACE,
837  .size[0] = 16,
838  .associativity = 8,
839  },
840 // 0x72
841  {.descriptor = 0x72,
842  .level = 1,
843  .type = PAPI_MH_TYPE_TRACE,
844  .size[0] = 32,
845  .associativity = 8,
846  },
847 // 0x73
848  {.descriptor = 0x73,
849  .level = 1,
850  .type = PAPI_MH_TYPE_TRACE,
851  .size[0] = 64,
852  .associativity = 8,
853  },
854 // 0x78
855  {.descriptor = 0x78,
856  .level = 2,
857  .type = PAPI_MH_TYPE_UNIFIED,
858  .size[0] = 1024,
859  .associativity = 4,
860  .line_size = 64,
861  },
862 // 0x79
863  {.descriptor = 0x79,
864  .level = 2,
865  .type = PAPI_MH_TYPE_UNIFIED,
866  .size[0] = 128,
867  .associativity = 8,
868  .sector = 1,
869  .line_size = 64,
870  },
871 // 0x7A
872  {.descriptor = 0x7A,
873  .level = 2,
874  .type = PAPI_MH_TYPE_UNIFIED,
875  .size[0] = 256,
876  .associativity = 8,
877  .sector = 1,
878  .line_size = 64,
879  },
880 // 0x7B
881  {.descriptor = 0x7B,
882  .level = 2,
883  .type = PAPI_MH_TYPE_UNIFIED,
884  .size[0] = 512,
885  .associativity = 8,
886  .sector = 1,
887  .line_size = 64,
888  },
889 // 0x7C
890  {.descriptor = 0x7C,
891  .level = 2,
892  .type = PAPI_MH_TYPE_UNIFIED,
893  .size[0] = 1024,
894  .associativity = 8,
895  .sector = 1,
896  .line_size = 64,
897  },
898 // 0x7D
899  {.descriptor = 0x7D,
900  .level = 2,
901  .type = PAPI_MH_TYPE_UNIFIED,
902  .size[0] = 2048,
903  .associativity = 8,
904  .line_size = 64,
905  },
906 // 0x7F
907  {.descriptor = 0x7F,
908  .level = 2,
909  .type = PAPI_MH_TYPE_UNIFIED,
910  .size[0] = 512,
911  .associativity = 2,
912  .line_size = 64,
913  },
914 // 0x80
915  {.descriptor = 0x80,
916  .level = 2,
917  .type = PAPI_MH_TYPE_UNIFIED,
918  .size[0] = 512,
919  .associativity = 8,
920  .line_size = 64,
921  },
922 // 0x82
923  {.descriptor = 0x82,
924  .level = 2,
925  .type = PAPI_MH_TYPE_UNIFIED,
926  .size[0] = 256,
927  .associativity = 8,
928  .line_size = 32,
929  },
930 // 0x83
931  {.descriptor = 0x83,
932  .level = 2,
933  .type = PAPI_MH_TYPE_UNIFIED,
934  .size[0] = 512,
935  .associativity = 8,
936  .line_size = 32,
937  },
938 // 0x84
939  {.descriptor = 0x84,
940  .level = 2,
941  .type = PAPI_MH_TYPE_UNIFIED,
942  .size[0] = 1024,
943  .associativity = 8,
944  .line_size = 32,
945  },
946 // 0x85
947  {.descriptor = 0x85,
948  .level = 2,
949  .type = PAPI_MH_TYPE_UNIFIED,
950  .size[0] = 2048,
951  .associativity = 8,
952  .line_size = 32,
953  },
954 // 0x86
955  {.descriptor = 0x86,
956  .level = 2,
957  .type = PAPI_MH_TYPE_UNIFIED,
958  .size[0] = 512,
959  .associativity = 4,
960  .line_size = 64,
961  },
962 // 0x87
963  {.descriptor = 0x87,
964  .level = 2,
965  .type = PAPI_MH_TYPE_UNIFIED,
966  .size[0] = 1024,
967  .associativity = 8,
968  .line_size = 64,
969  },
970 // 0xB0
971  {.descriptor = 0xB0,
972  .level = 1,
974  .size[0] = 4,
975  .associativity = 4,
976  .entries = 128,
977  },
978 // 0xB1 NOTE: This is currently the only instance where .entries
979 // is dependent on .size. It's handled as a code exception.
980 // If other instances appear in the future, the structure
981 // should probably change to accomodate it.
982  {.descriptor = 0xB1,
983  .level = 1,
985  .size = {2048, 4096, 0},
986  .associativity = 4,
987  .entries = 8, /* or 4 if size = 4096 */
988  },
989 // 0xB2
990  {.descriptor = 0xB2,
991  .level = 1,
993  .size[0] = 4,
994  .associativity = 4,
995  .entries = 64,
996  },
997 // 0xB3
998  {.descriptor = 0xB3,
999  .level = 1,
1001  .size[0] = 4,
1002  .associativity = 4,
1003  .entries = 128,
1004  },
1005 // 0xB4
1006  {.descriptor = 0xB4,
1007  .level = 1,
1009  .size[0] = 4,
1010  .associativity = 4,
1011  .entries = 256,
1012  },
1013 // 0xBA
1014  {.descriptor = 0xBA,
1015  .level = 1,
1017  .size[0] = 4,
1018  .associativity = 4,
1019  .entries = 64,
1020  },
1021 // 0xC0
1022  {.descriptor = 0xBA,
1023  .level = 1,
1025  .size = {4,4096},
1026  .associativity = 4,
1027  .entries = 8,
1028  },
1029 // 0xCA
1030  {.descriptor = 0xCA,
1031  .level = 2,
1033  .size[0] = 4,
1034  .associativity = 4,
1035  .entries = 512,
1036  },
1037 // 0xD0
1038  {.descriptor = 0xD0,
1039  .level = 3,
1040  .type = PAPI_MH_TYPE_UNIFIED,
1041  .size[0] = 512,
1042  .associativity = 4,
1043  .line_size = 64,
1044  },
1045 // 0xD1
1046  {.descriptor = 0xD1,
1047  .level = 3,
1048  .type = PAPI_MH_TYPE_UNIFIED,
1049  .size[0] = 1024,
1050  .associativity = 4,
1051  .line_size = 64,
1052  },
1053 // 0xD2
1054  {.descriptor = 0xD2,
1055  .level = 3,
1056  .type = PAPI_MH_TYPE_UNIFIED,
1057  .size[0] = 2048,
1058  .associativity = 4,
1059  .line_size = 64,
1060  },
1061 // 0xD6
1062  {.descriptor = 0xD6,
1063  .level = 3,
1064  .type = PAPI_MH_TYPE_UNIFIED,
1065  .size[0] = 1024,
1066  .associativity = 8,
1067  .line_size = 64,
1068  },
1069 // 0xD7
1070  {.descriptor = 0xD7,
1071  .level = 3,
1072  .type = PAPI_MH_TYPE_UNIFIED,
1073  .size[0] = 2048,
1074  .associativity = 8,
1075  .line_size = 64,
1076  },
1077 // 0xD8
1078  {.descriptor = 0xD8,
1079  .level = 3,
1080  .type = PAPI_MH_TYPE_UNIFIED,
1081  .size[0] = 4096,
1082  .associativity = 8,
1083  .line_size = 64,
1084  },
1085 // 0xDC
1086  {.descriptor = 0xDC,
1087  .level = 3,
1088  .type = PAPI_MH_TYPE_UNIFIED,
1089  .size[0] = 1536,
1090  .associativity = 12,
1091  .line_size = 64,
1092  },
1093 // 0xDD
1094  {.descriptor = 0xDD,
1095  .level = 3,
1096  .type = PAPI_MH_TYPE_UNIFIED,
1097  .size[0] = 3072,
1098  .associativity = 12,
1099  .line_size = 64,
1100  },
1101 // 0xDE
1102  {.descriptor = 0xDE,
1103  .level = 3,
1104  .type = PAPI_MH_TYPE_UNIFIED,
1105  .size[0] = 6144,
1106  .associativity = 12,
1107  .line_size = 64,
1108  },
1109 // 0xE2
1110  {.descriptor = 0xE2,
1111  .level = 3,
1112  .type = PAPI_MH_TYPE_UNIFIED,
1113  .size[0] = 2048,
1114  .associativity = 16,
1115  .line_size = 64,
1116  },
1117 // 0xE3
1118  {.descriptor = 0xE3,
1119  .level = 3,
1120  .type = PAPI_MH_TYPE_UNIFIED,
1121  .size[0] = 4096,
1122  .associativity = 16,
1123  .line_size = 64,
1124  },
1125 // 0xE4
1126  {.descriptor = 0xE4,
1127  .level = 3,
1128  .type = PAPI_MH_TYPE_UNIFIED,
1129  .size[0] = 8192,
1130  .associativity = 16,
1131  .line_size = 64,
1132  },
1133 // 0xEA
1134  {.descriptor = 0xEA,
1135  .level = 3,
1136  .type = PAPI_MH_TYPE_UNIFIED,
1137  .size[0] = 12288,
1138  .associativity = 24,
1139  .line_size = 64,
1140  },
1141 // 0xEB
1142  {.descriptor = 0xEB,
1143  .level = 3,
1144  .type = PAPI_MH_TYPE_UNIFIED,
1145  .size[0] = 18432,
1146  .associativity = 24,
1147  .line_size = 64,
1148  },
1149 // 0xEC
1150  {.descriptor = 0xEC,
1151  .level = 3,
1152  .type = PAPI_MH_TYPE_UNIFIED,
1153  .size[0] = 24576,
1154  .associativity = 24,
1155  .line_size = 64,
1156  },
1157 // 0xF0
1158  {.descriptor = 0xF0,
1159  .level = 1,
1160  .type = PAPI_MH_TYPE_PREF,
1161  .size[0] = 64,
1162  },
1163 // 0xF1
1164  {.descriptor = 0xF1,
1165  .level = 1,
1166  .type = PAPI_MH_TYPE_PREF,
1167  .size[0] = 128,
1168  },
1169 };
1170 
1171 #ifdef DEBUG
1172 static void
1174 {
1175  int i, j, k =
1176  ( int ) ( sizeof ( intel_cache ) /
1177  sizeof ( struct _intel_cache_info ) );
1178  for ( i = 0; i < k; i++ ) {
1179  printf( "%d.\tDescriptor: %#x\n", i, intel_cache[i].descriptor );
1180  printf( "\t Level: %d\n", intel_cache[i].level );
1181  printf( "\t Type: %d\n", intel_cache[i].type );
1182  printf( "\t Size(s): " );
1183  for ( j = 0; j < TLB_SIZES; j++ )
1184  printf( "%d, ", intel_cache[i].size[j] );
1185  printf( "\n" );
1186  printf( "\t Assoc: %d\n", intel_cache[i].associativity );
1187  printf( "\t Sector: %d\n", intel_cache[i].sector );
1188  printf( "\t Line Size: %d\n", intel_cache[i].line_size );
1189  printf( "\t Entries: %d\n", intel_cache[i].entries );
1190  printf( "\n" );
1191  }
1192 }
1193 #endif
1194 
1195 /* Given a specific cache descriptor, this routine decodes the information from a table
1196  * of such descriptors and fills out one or more records in a PAPI data structure.
1197  * Called only by init_intel()
1198  */
1199 static void
1201 {
1202  int i, next;
1203  int level = d->level - 1;
1206 
1207  if ( d->descriptor == 0x49 ) { /* special case */
1208  unsigned int r_eax, r_ebx, r_ecx, r_edx;
1209  r_eax = 0x1; /* function code 1: family & model */
1210  cpuid( &r_eax, &r_ebx, &r_ecx, &r_edx );
1211  /* override table for Family F, model 6 only */
1212  if ( ( r_eax & 0x0FFF3FF0 ) == 0xF60 )
1213  level = 3;
1214  }
1215  if ( d->type & PAPI_MH_TYPE_TLB ) {
1216  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1217  if ( L[level].tlb[next].type == PAPI_MH_TYPE_EMPTY )
1218  break;
1219  }
1220  /* expand TLB entries for multiple possible page sizes */
1221  for ( i = 0; i < TLB_SIZES && next < PAPI_MH_MAX_LEVELS && d->size[i];
1222  i++, next++ ) {
1223 // printf("Level %d Descriptor: %#x TLB type %#x next: %d, i: %d\n", level, d->descriptor, d->type, next, i);
1224  t = &L[level].tlb[next];
1225  t->type = PAPI_MH_CACHE_TYPE( d->type );
1226  t->num_entries = d->entries;
1227  t->page_size = d->size[i] << 10; /* minimum page size in KB */
1228  t->associativity = d->associativity;
1229  /* another special case */
1230  if ( d->descriptor == 0xB1 && d->size[i] == 4096 )
1231  t->num_entries = d->entries / 2;
1232  }
1233  } else {
1234  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1235  if ( L[level].cache[next].type == PAPI_MH_TYPE_EMPTY )
1236  break;
1237  }
1238 // printf("Level %d Descriptor: %#x Cache type %#x next: %d\n", level, d->descriptor, d->type, next);
1239  c = &L[level].cache[next];
1240  c->type = PAPI_MH_CACHE_TYPE( d->type );
1241  c->size = d->size[0] << 10; /* convert from KB to bytes */
1242  c->associativity = d->associativity;
1243  if ( d->line_size ) {
1244  c->line_size = d->line_size;
1245  c->num_lines = c->size / c->line_size;
1246  }
1247  }
1248 }
1249 
1250 static inline void
1251 cpuid2 ( unsigned int* eax, unsigned int* ebx,
1252  unsigned int* ecx, unsigned int* edx,
1253  unsigned int index, unsigned int ecx_in )
1254 {
1255  unsigned int a,b,c,d;
1256  __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
1257  : "=a" (a), "=S" (b), "=c" (c), "=d" (d) \
1258  : "0" (index), "2"(ecx_in) );
1259  *eax = a; *ebx = b; *ecx = c; *edx = d;
1260 }
1261 
1262 
1263 static int
1264 init_intel_leaf4( PAPI_mh_info_t * mh_info, int *num_levels )
1265 {
1266 
1267  unsigned int eax, ebx, ecx, edx;
1268  unsigned int maxidx, ecx_in;
1269  int next;
1270 
1271  int cache_type,cache_level,cache_selfinit,cache_fullyassoc;
1272  int cache_linesize,cache_partitions,cache_ways,cache_sets;
1273 
1275 
1276  *num_levels=0;
1277 
1278  cpuid2(&eax,&ebx,&ecx,&edx, 0, 0);
1279  maxidx = eax;
1280 
1281  if (maxidx<4) {
1282  MEMDBG("Warning! CPUID Index 4 not supported!\n");
1283  return PAPI_ENOSUPP;
1284  }
1285 
1286  ecx_in=0;
1287  while(1) {
1288  cpuid2(&eax,&ebx,&ecx,&edx, 4, ecx_in);
1289 
1290 
1291 
1292  /* decoded as per table 3-12 in Intel Software Developer's Manual Volume 2A */
1293 
1294  cache_type=eax&0x1f;
1295  if (cache_type==0) break;
1296 
1297  cache_level=(eax>>5)&0x3;
1298  cache_selfinit=(eax>>8)&0x1;
1299  cache_fullyassoc=(eax>>9)&0x1;
1300 
1301  cache_linesize=(ebx&0xfff)+1;
1302  cache_partitions=((ebx>>12)&0x3ff)+1;
1303  cache_ways=((ebx>>22)&0x3ff)+1;
1304 
1305  cache_sets=(ecx)+1;
1306 
1307  /* should we export this info?
1308 
1309  cache_maxshare=((eax>>14)&0xfff)+1;
1310  cache_maxpackage=((eax>>26)&0x3f)+1;
1311 
1312  cache_wb=(edx)&1;
1313  cache_inclusive=(edx>>1)&1;
1314  cache_indexing=(edx>>2)&1;
1315  */
1316 
1317  if (cache_level>*num_levels) *num_levels=cache_level;
1318 
1319  /* find next slot available to hold cache info */
1320  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1321  if ( mh_info->level[cache_level-1].cache[next].type == PAPI_MH_TYPE_EMPTY ) break;
1322  }
1323 
1324  c=&(mh_info->level[cache_level-1].cache[next]);
1325 
1326  switch(cache_type) {
1327  case 1: MEMDBG("L%d Data Cache\n",cache_level);
1329  break;
1330  case 2: MEMDBG("L%d Instruction Cache\n",cache_level);
1332  break;
1333  case 3: MEMDBG("L%d Unified Cache\n",cache_level);
1335  break;
1336  }
1337 
1338  if (cache_selfinit) { MEMDBG("\tSelf-init\n"); }
1339  if (cache_fullyassoc) { MEMDBG("\tFully Associtative\n"); }
1340 
1341  //MEMDBG("\tMax logical processors sharing cache: %d\n",cache_maxshare);
1342  //MEMDBG("\tMax logical processors sharing package: %d\n",cache_maxpackage);
1343 
1344  MEMDBG("\tCache linesize: %d\n",cache_linesize);
1345 
1346  MEMDBG("\tCache partitions: %d\n",cache_partitions);
1347  MEMDBG("\tCache associaticity: %d\n",cache_ways);
1348 
1349  MEMDBG("\tCache sets: %d\n",cache_sets);
1350  MEMDBG("\tCache size = %dkB\n",
1351  (cache_ways*cache_partitions*cache_linesize*cache_sets)/1024);
1352 
1353  //MEMDBG("\tWBINVD/INVD acts on lower caches: %d\n",cache_wb);
1354  //MEMDBG("\tCache is not inclusive: %d\n",cache_inclusive);
1355  //MEMDBG("\tComplex cache indexing: %d\n",cache_indexing);
1356 
1357  c->line_size=cache_linesize;
1358  if (cache_fullyassoc) {
1359  c->associativity=SHRT_MAX;
1360  }
1361  else {
1362  c->associativity=cache_ways;
1363  }
1364  c->size=(cache_ways*cache_partitions*cache_linesize*cache_sets);
1365  c->num_lines=cache_ways*cache_partitions*cache_sets;
1366 
1367  ecx_in++;
1368  }
1369  return PAPI_OK;
1370 }
1371 
1372 static int
1373 init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels)
1374 {
1375  /* cpuid() returns memory copies of 4 32-bit registers
1376  * this union allows them to be accessed as either registers
1377  * or individual bytes. Remember that Intel is little-endian.
1378  */
1379  union
1380  {
1381  struct
1382  {
1383  unsigned int ax, bx, cx, dx;
1384  } e;
1385  unsigned char descrip[16];
1386  } reg;
1387 
1388  int r; /* register boundary index */
1389  int b; /* byte index into a register */
1390  int i; /* byte index into the descrip array */
1391  int t; /* table index into the static descriptor table */
1392  int count; /* how many times to call cpuid; from eax:lsb */
1393  int size; /* size of the descriptor table */
1394  int last_level = 0; /* how many levels in the cache hierarchy */
1395 
1396  int need_leaf4=0;
1397 
1398  /* All of Intel's cache info is in 1 call to cpuid
1399  * however it is a table lookup :(
1400  */
1401  MEMDBG( "Initializing Intel Cache and TLB descriptors\n" );
1402 
1403 #ifdef DEBUG
1404  if ( ISLEVEL( DEBUG_MEMORY ) )
1406 #endif
1407 
1408  reg.e.ax = 0x2; /* function code 2: cache descriptors */
1409  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
1410 
1411  MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n",
1412  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
1413  MEMDBG
1414  ( ":\nd0: %#x %#x %#x %#x\nd1: %#x %#x %#x %#x\nd2: %#x %#x %#x %#x\nd3: %#x %#x %#x %#x\n",
1415  reg.descrip[0], reg.descrip[1], reg.descrip[2], reg.descrip[3],
1416  reg.descrip[4], reg.descrip[5], reg.descrip[6], reg.descrip[7],
1417  reg.descrip[8], reg.descrip[9], reg.descrip[10], reg.descrip[11],
1418  reg.descrip[12], reg.descrip[13], reg.descrip[14], reg.descrip[15] );
1419 
1420  count = reg.descrip[0]; /* # times to repeat CPUID call. Not implemented. */
1421 
1422  /* Knights Corner at least returns 0 here */
1423  if (count==0) goto early_exit;
1424 
1425  size = ( sizeof ( intel_cache ) / sizeof ( struct _intel_cache_info ) ); /* # descriptors */
1426  MEMDBG( "Repeat cpuid(2,...) %d times. If not 1, code is broken.\n",
1427  count );
1428  if (count!=1) {
1429  fprintf(stderr,"Warning: Unhandled cpuid count of %d\n",count);
1430  }
1431 
1432  for ( r = 0; r < 4; r++ ) { /* walk the registers */
1433  if ( ( reg.descrip[r * 4 + 3] & 0x80 ) == 0 ) { /* only process if high order bit is 0 */
1434  for ( b = 3; b >= 0; b-- ) { /* walk the descriptor bytes from high to low */
1435  i = r * 4 + b; /* calculate an index into the array of descriptors */
1436  if ( i ) { /* skip the low order byte in eax [0]; it's the count (see above) */
1437  if ( reg.descrip[i] == 0xff ) {
1438  MEMDBG("Warning! PAPI x86_cache: must implement cpuid leaf 4\n");
1439  need_leaf4=1;
1440  return PAPI_ENOSUPP;
1441  /* we might continue instead */
1442  /* in order to get TLB info */
1443  /* continue; */
1444  }
1445  for ( t = 0; t < size; t++ ) { /* walk the descriptor table */
1446  if ( reg.descrip[i] == intel_cache[t].descriptor ) { /* find match */
1447  if ( intel_cache[t].level > last_level )
1448  last_level = intel_cache[t].level;
1449  intel_decode_descriptor( &intel_cache[t],
1450  mh_info->level );
1451  }
1452  }
1453  }
1454  }
1455  }
1456  }
1457 early_exit:
1458  MEMDBG( "# of Levels: %d\n", last_level );
1459  *num_levels=last_level;
1460  if (need_leaf4) {
1461  return PAPI_ENOSUPP;
1462  }
1463  return PAPI_OK;
1464 }
1465 
1466 
1467 static int
1468 init_intel( PAPI_mh_info_t * mh_info, int *levels )
1469 {
1470 
1471  int result;
1472  int num_levels;
1473 
1474  /* try using the oldest leaf2 method first */
1475  result=init_intel_leaf2(mh_info, &num_levels);
1476 
1477  if (result!=PAPI_OK) {
1478  /* All Core2 and newer also support leaf4 detection */
1479  /* Starting with Westmere *only* leaf4 is supported */
1480  result=init_intel_leaf4(mh_info, &num_levels);
1481  }
1482 
1483  *levels=num_levels;
1484  return PAPI_OK;
1485 }
1486 
1487 
1488 /* Returns 1 if hypervisor detected */
1489 /* Returns 0 if none found. */
1490 int
1491 _x86_detect_hypervisor(char *vendor_name)
1492 {
1493  unsigned int eax, ebx, ecx, edx;
1494  char hyper_vendor_id[13];
1495 
1496  cpuid2(&eax, &ebx, &ecx, &edx,0x1,0);
1497  /* This is the hypervisor bit, ecx bit 31 */
1498  if (ecx&0x80000000) {
1499  /* There are various values in the 0x4000000X range */
1500  /* It is questionable how standard they are */
1501  /* For now we just return the name. */
1502  cpuid2(&eax, &ebx, &ecx, &edx, 0x40000000,0);
1503  memcpy(hyper_vendor_id + 0, &ebx, 4);
1504  memcpy(hyper_vendor_id + 4, &ecx, 4);
1505  memcpy(hyper_vendor_id + 8, &edx, 4);
1506  hyper_vendor_id[12] = '\0';
1507  strncpy(vendor_name,hyper_vendor_id,PAPI_MAX_STR_LEN);
1508  return 1;
1509  }
1510  else {
1511  strncpy(vendor_name,"none",PAPI_MAX_STR_LEN);
1512  }
1513  return 0;
1514 }
1515 
1516 
1517 
1518 
1519 
static void intel_decode_descriptor(struct _intel_cache_info *d, PAPI_mh_level_t *L)
static void print_intel_cache_table()
int levels
Definition: papi.h:769
#define PAPI_ENOIMPL
Definition: fpapi.h:124
#define PAPI_MH_TYPE_INST
Definition: papi.h:725
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
static struct _intel_cache_info intel_cache[]
static int init_intel(PAPI_mh_info_t *mh_info, int *levels)
#define PAPI_MH_TYPE_WB
Definition: papi.h:732
static void cpuid2(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx, unsigned int index, unsigned int ecx_in)
static int init_intel_leaf2(PAPI_mh_info_t *mh_info, int *num_levels)
int size[TLB_SIZES]
int associativity
Definition: papi.h:748
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
#define PAPI_MH_TYPE_PSEUDO_LRU
Definition: papi.h:736
#define DEBUG_MEMORY
Definition: papi_debug.h:34
#define printf
Definition: papi_test.h:125
double c
Definition: multiplex.c:22
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:37
#define PAPI_MH_TYPE_TRACE
Definition: papi.h:728
Return codes and api definitions.
#define PAPI_MH_TYPE_DATA
Definition: papi.h:726
#define PAPI_MH_TYPE_TLB
Definition: papi.h:738
t
Definition: iozone.c:23562
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:763
int i
Definition: fileop.c:140
#define PAPI_ENOSUPP
Definition: fpapi.h:123
int _x86_cache_info(PAPI_mh_info_t *mh_info)
char *long long size
Definition: iozone.c:12023
#define TLB_SIZES
int k
Definition: iozone.c:19136
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:770
PAPI_mh_tlb_info_t tlb[PAPI_MH_MAX_LEVELS]
Definition: papi.h:762
static void init_mem_hierarchy(PAPI_mh_info_t *mh_info)
#define MEMDBG(format, args...)
Definition: papi_debug.h:70
mh for mem hierarchy maybe?
Definition: papi.h:768
int _x86_detect_hypervisor(char *vendor_name)
nsize_list next
Definition: iozone.c:20053
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:38
int
Definition: iozone.c:18528
#define ISLEVEL(a)
Definition: papi_debug.h:54
static int init_intel_leaf4(PAPI_mh_info_t *mh_info, int *num_levels)
static short int _amd_L2_L3_assoc(unsigned short int pattern)
#define PAPI_MH_TYPE_UNIFIED
Definition: papi.h:729
#define PAPI_MH_TYPE_EMPTY
Definition: papi.h:724
#define PAPI_MH_TYPE_PREF
Definition: papi.h:739
#define PAPI_MH_TYPE_WT
Definition: papi.h:731
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
#define PAPI_MH_MAX_LEVELS
Definition: fpapi.h:87
static int init_amd(PAPI_mh_info_t *mh_info, int *levels)
#define PAPI_MH_CACHE_TYPE(a)
Definition: papi.h:730
int int op
Definition: iozone.c:19389
#define PAPI_MAX_MEM_HIERARCHY_LEVELS
Definition: papi.h:741
unsigned int pattern
Definition: iozone.c:1531