PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
x86_cpuid_info.c
Go to the documentation of this file.
1 /****************************/
2 /* THIS IS OPEN SOURCE CODE */
3 /****************************/
4 
5 /*
6 * File: x86_cpuid_info.c
7 * Author: Dan Terpstra
8 * terpstra@eecs.utk.edu
9 * complete rewrite of linux-memory.c to conform to latest docs
10 * and convert Intel to a table driven implementation.
11 * Now also supports multiple TLB descriptors
12 */
13 
14 #include <string.h>
15 #include <stdio.h>
16 #include "papi.h"
17 #include "papi_internal.h"
18 
19 
20 static void init_mem_hierarchy( PAPI_mh_info_t * mh_info );
21 static int init_amd( PAPI_mh_info_t * mh_info, int *levels );
22 static short int _amd_L2_L3_assoc( unsigned short int pattern );
23 static int init_intel( PAPI_mh_info_t * mh_info , int *levels);
24 
25 #if defined( __amd64__ ) || defined (__x86_64__)
26 static inline void
27 cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d )
28 {
29  unsigned int op = *a;
30  __asm__("cpuid;"
31  : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
32  : "a" (op) );
33 }
34 #else
35 static inline void
36 cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d )
37 {
38  unsigned int op = *a;
39  // .byte 0x53 == push ebx. it's universal for 32 and 64 bit
40  // .byte 0x5b == pop ebx.
41  // Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode.
42  // Using the opcode directly avoids this problem.
43  __asm__ __volatile__( ".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b":"=a"( *a ), "=S"( *b ), "=c"( *c ),
44  "=d"
45  ( *d )
46  : "a"( op ) );
47 }
48 #endif
49 
50 int
52 {
53  int retval = 0;
54  union
55  {
56  struct
57  {
58  unsigned int ax, bx, cx, dx;
59  } e;
60  char vendor[20]; /* leave room for terminator bytes */
61  } reg;
62 
63  /* Don't use cpu_type to determine the processor.
64  * get the information directly from the chip.
65  */
66  reg.e.ax = 0; /* function code 0: vendor string */
67  /* The vendor string is composed of EBX:EDX:ECX.
68  * by swapping the register addresses in the call below,
69  * the string is correctly composed in the char array.
70  */
71  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.dx, &reg.e.cx );
72  reg.vendor[16] = 0;
73  MEMDBG( "Vendor: %s\n", &reg.vendor[4] );
74 
75  init_mem_hierarchy( mh_info );
76 
77  if ( !strncmp( "GenuineIntel", &reg.vendor[4], 12 ) ) {
78  init_intel( mh_info, &mh_info->levels);
79  } else if ( !strncmp( "AuthenticAMD", &reg.vendor[4], 12 ) ) {
80  init_amd( mh_info, &mh_info->levels );
81  } else {
82  MEMDBG( "Unsupported cpu type; Not Intel or AMD x86\n" );
83  return PAPI_ENOIMPL;
84  }
85 
86  /* This works only because an empty cache element is initialized to 0 */
87  MEMDBG( "Detected L1: %d L2: %d L3: %d\n",
88  mh_info->level[0].cache[0].size + mh_info->level[0].cache[1].size,
89  mh_info->level[1].cache[0].size + mh_info->level[1].cache[1].size,
90  mh_info->level[2].cache[0].size + mh_info->level[2].cache[1].size );
91  return retval;
92 }
93 
94 static void
96 {
97  int i, j;
98  PAPI_mh_level_t *L = mh_info->level;
99 
100  /* initialize entire memory hierarchy structure to benign values */
101  for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
102  for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
103  L[i].tlb[j].type = PAPI_MH_TYPE_EMPTY;
104  L[i].tlb[j].num_entries = 0;
105  L[i].tlb[j].associativity = 0;
107  L[i].cache[j].size = 0;
108  L[i].cache[j].line_size = 0;
109  L[i].cache[j].num_lines = 0;
110  L[i].cache[j].associativity = 0;
111  }
112  }
113 }
114 
115 static short int
116 _amd_L2_L3_assoc( unsigned short int pattern )
117 {
118  /* From "CPUID Specification" #25481 Rev 2.28, April 2008 */
119  short int assoc[16] =
120  { 0, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, SHRT_MAX };
121  if ( pattern > 0xF )
122  return -1;
123  return ( assoc[pattern] );
124 }
125 
126 /* Cache configuration for AMD Athlon/Duron */
127 static int
128 init_amd( PAPI_mh_info_t * mh_info, int *num_levels )
129 {
130  union
131  {
132  struct
133  {
134  unsigned int ax, bx, cx, dx;
135  } e;
136  unsigned char byt[16];
137  } reg;
138  int i, j, levels = 0;
139  PAPI_mh_level_t *L = mh_info->level;
140 
141  /*
142  * Layout of CPU information taken from :
143  * "CPUID Specification" #25481 Rev 2.28, April 2008 for most current info.
144  */
145 
146  MEMDBG( "Initializing AMD memory info\n" );
147  /* AMD level 1 cache info */
148  reg.e.ax = 0x80000005; /* extended function code 5: L1 Cache and TLB Identifiers */
149  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
150 
151  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
152  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
153  MEMDBG
154  ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
155  reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
156  reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
157  reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
158  reg.byt[15] );
159 
160  /* NOTE: We assume L1 cache and TLB always exists */
161  /* L1 TLB info */
162 
163  /* 4MB memory page information; half the number of entries as 2MB */
164  L[0].tlb[0].type = PAPI_MH_TYPE_INST;
165  L[0].tlb[0].num_entries = reg.byt[0] / 2;
166  L[0].tlb[0].page_size = 4096 << 10;
167  L[0].tlb[0].associativity = reg.byt[1];
168 
169  L[0].tlb[1].type = PAPI_MH_TYPE_DATA;
170  L[0].tlb[1].num_entries = reg.byt[2] / 2;
171  L[0].tlb[1].page_size = 4096 << 10;
172  L[0].tlb[1].associativity = reg.byt[3];
173 
174  /* 2MB memory page information */
175  L[0].tlb[2].type = PAPI_MH_TYPE_INST;
176  L[0].tlb[2].num_entries = reg.byt[0];
177  L[0].tlb[2].page_size = 2048 << 10;
178  L[0].tlb[2].associativity = reg.byt[1];
179 
180  L[0].tlb[3].type = PAPI_MH_TYPE_DATA;
181  L[0].tlb[3].num_entries = reg.byt[2];
182  L[0].tlb[3].page_size = 2048 << 10;
183  L[0].tlb[3].associativity = reg.byt[3];
184 
185  /* 4k page information */
186  L[0].tlb[4].type = PAPI_MH_TYPE_INST;
187  L[0].tlb[4].num_entries = reg.byt[4];
188  L[0].tlb[4].page_size = 4 << 10;
189  L[0].tlb[4].associativity = reg.byt[5];
190 
191  L[0].tlb[5].type = PAPI_MH_TYPE_DATA;
192  L[0].tlb[5].num_entries = reg.byt[6];
193  L[0].tlb[5].page_size = 4 << 10;
194  L[0].tlb[5].associativity = reg.byt[7];
195 
196  for ( i = 0; i < PAPI_MH_MAX_LEVELS; i++ ) {
197  if ( L[0].tlb[i].associativity == 0xff )
198  L[0].tlb[i].associativity = SHRT_MAX;
199  }
200 
201  /* L1 D-cache info */
202  L[0].cache[0].type =
204  L[0].cache[0].size = reg.byt[11] << 10;
205  L[0].cache[0].associativity = reg.byt[10];
206  L[0].cache[0].line_size = reg.byt[8];
207  /* Byt[9] is "Lines per tag" */
208  /* Is that == lines per cache? */
209  /* L[0].cache[1].num_lines = reg.byt[9]; */
210  if ( L[0].cache[0].line_size )
211  L[0].cache[0].num_lines = L[0].cache[0].size / L[0].cache[0].line_size;
212  MEMDBG( "D-Cache Line Count: %d; Computed: %d\n", reg.byt[9],
213  L[0].cache[0].num_lines );
214 
215  /* L1 I-cache info */
216  L[0].cache[1].type = PAPI_MH_TYPE_INST;
217  L[0].cache[1].size = reg.byt[15] << 10;
218  L[0].cache[1].associativity = reg.byt[14];
219  L[0].cache[1].line_size = reg.byt[12];
220  /* Byt[13] is "Lines per tag" */
221  /* Is that == lines per cache? */
222  /* L[0].cache[1].num_lines = reg.byt[13]; */
223  if ( L[0].cache[1].line_size )
224  L[0].cache[1].num_lines = L[0].cache[1].size / L[0].cache[1].line_size;
225  MEMDBG( "I-Cache Line Count: %d; Computed: %d\n", reg.byt[13],
226  L[0].cache[1].num_lines );
227 
228  for ( i = 0; i < 2; i++ ) {
229  if ( L[0].cache[i].associativity == 0xff )
230  L[0].cache[i].associativity = SHRT_MAX;
231  }
232 
233  /* AMD L2/L3 Cache and L2 TLB info */
234  /* NOTE: For safety we assume L2 and L3 cache and TLB may not exist */
235 
236  reg.e.ax = 0x80000006; /* extended function code 6: L2/L3 Cache and L2 TLB Identifiers */
237  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
238 
239  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
240  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
241  MEMDBG
242  ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
243  reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
244  reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
245  reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
246  reg.byt[15] );
247 
248  /* L2 TLB info */
249 
250  if ( reg.byt[0] | reg.byt[1] ) { /* Level 2 ITLB exists */
251  /* 4MB ITLB page information; half the number of entries as 2MB */
252  L[1].tlb[0].type = PAPI_MH_TYPE_INST;
253  L[1].tlb[0].num_entries =
254  ( ( ( short ) ( reg.byt[1] & 0xF ) << 8 ) + reg.byt[0] ) / 2;
255  L[1].tlb[0].page_size = 4096 << 10;
256  L[1].tlb[0].associativity =
257  _amd_L2_L3_assoc( ( reg.byt[1] & 0xF0 ) >> 4 );
258 
259  /* 2MB ITLB page information */
260  L[1].tlb[2].type = PAPI_MH_TYPE_INST;
261  L[1].tlb[2].num_entries = L[1].tlb[0].num_entries * 2;
262  L[1].tlb[2].page_size = 2048 << 10;
263  L[1].tlb[2].associativity = L[1].tlb[0].associativity;
264  }
265 
266  if ( reg.byt[2] | reg.byt[3] ) { /* Level 2 DTLB exists */
267  /* 4MB DTLB page information; half the number of entries as 2MB */
268  L[1].tlb[1].type = PAPI_MH_TYPE_DATA;
269  L[1].tlb[1].num_entries =
270  ( ( ( short ) ( reg.byt[3] & 0xF ) << 8 ) + reg.byt[2] ) / 2;
271  L[1].tlb[1].page_size = 4096 << 10;
272  L[1].tlb[1].associativity =
273  _amd_L2_L3_assoc( ( reg.byt[3] & 0xF0 ) >> 4 );
274 
275  /* 2MB DTLB page information */
276  L[1].tlb[3].type = PAPI_MH_TYPE_DATA;
277  L[1].tlb[3].num_entries = L[1].tlb[1].num_entries * 2;
278  L[1].tlb[3].page_size = 2048 << 10;
279  L[1].tlb[3].associativity = L[1].tlb[1].associativity;
280  }
281 
282  /* 4k page information */
283  if ( reg.byt[4] | reg.byt[5] ) { /* Level 2 ITLB exists */
284  L[1].tlb[4].type = PAPI_MH_TYPE_INST;
285  L[1].tlb[4].num_entries =
286  ( ( short ) ( reg.byt[5] & 0xF ) << 8 ) + reg.byt[4];
287  L[1].tlb[4].page_size = 4 << 10;
288  L[1].tlb[4].associativity =
289  _amd_L2_L3_assoc( ( reg.byt[5] & 0xF0 ) >> 4 );
290  }
291  if ( reg.byt[6] | reg.byt[7] ) { /* Level 2 DTLB exists */
292  L[1].tlb[5].type = PAPI_MH_TYPE_DATA;
293  L[1].tlb[5].num_entries =
294  ( ( short ) ( reg.byt[7] & 0xF ) << 8 ) + reg.byt[6];
295  L[1].tlb[5].page_size = 4 << 10;
296  L[1].tlb[5].associativity =
297  _amd_L2_L3_assoc( ( reg.byt[7] & 0xF0 ) >> 4 );
298  }
299 
300  /* AMD Level 2 cache info */
301  if ( reg.e.cx ) {
302  L[1].cache[0].type =
304  L[1].cache[0].size = ( int ) ( ( reg.e.cx & 0xffff0000 ) >> 6 ); /* right shift by 16; multiply by 2^10 */
305  L[1].cache[0].associativity =
306  _amd_L2_L3_assoc( ( reg.byt[9] & 0xF0 ) >> 4 );
307  L[1].cache[0].line_size = reg.byt[8];
308 /* L[1].cache[0].num_lines = reg.byt[9]&0xF; */
309  if ( L[1].cache[0].line_size )
310  L[1].cache[0].num_lines =
311  L[1].cache[0].size / L[1].cache[0].line_size;
312  MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[9] & 0xF,
313  L[1].cache[0].num_lines );
314  }
315 
316  /* AMD Level 3 cache info (shared across cores) */
317  if ( reg.e.dx ) {
318  L[2].cache[0].type =
320  L[2].cache[0].size = ( int ) ( reg.e.dx & 0xfffc0000 ) << 1; /* in blocks of 512KB (2^19) */
321  L[2].cache[0].associativity =
322  _amd_L2_L3_assoc( ( reg.byt[13] & 0xF0 ) >> 4 );
323  L[2].cache[0].line_size = reg.byt[12];
324 /* L[2].cache[0].num_lines = reg.byt[13]&0xF; */
325  if ( L[2].cache[0].line_size )
326  L[2].cache[0].num_lines =
327  L[2].cache[0].size / L[2].cache[0].line_size;
328  MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[13] & 0xF,
329  L[1].cache[0].num_lines );
330  }
331  for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
332  for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
333  /* Compute the number of levels of hierarchy actually used */
334  if ( L[i].tlb[j].type != PAPI_MH_TYPE_EMPTY ||
335  L[i].cache[j].type != PAPI_MH_TYPE_EMPTY )
336  levels = i + 1;
337  }
338  }
339  *num_levels = levels;
340  return PAPI_OK;
341 }
342 
343  /*
344  * The data from this table now comes from figure 3-17 in
345  * the Intel Architectures Software Reference Manual 2A
346  * (cpuid instruction section)
347  *
348  * Pretviously the information was provided by
349  * "Intel® Processor Identification and the CPUID Instruction",
350  * Application Note, AP-485, Nov 2008, 241618-033
351  * Updated to AP-485, Aug 2009, 241618-036
352  *
353  * The following data structure and its instantiation trys to
354  * capture all the information in Section 2.1.3 of the above
355  * document. Not all of it is used by PAPI, but it could be.
356  * As the above document is revised, this table should be
357  * updated.
358  */
359 
360 #define TLB_SIZES 3 /* number of different page sizes for a single TLB descriptor */
362 {
363  int descriptor; /* 0x00 - 0xFF: register descriptor code */
364  int level; /* 1 to PAPI_MH_MAX_LEVELS */
365  int type; /* Empty, instr, data, vector, unified | TLB */
366  int size[TLB_SIZES]; /* cache or TLB page size(s) in kB */
367  int associativity; /* SHRT_MAX == fully associative */
368  int sector; /* 1 if cache is sectored; else 0 */
369  int line_size; /* for cache */
370  int entries; /* for TLB */
371 };
372 
373 static struct _intel_cache_info intel_cache[] = {
374 // 0x01
375  {.descriptor = 0x01,
376  .level = 1,
378  .size[0] = 4,
379  .associativity = 4,
380  .entries = 32,
381  },
382 // 0x02
383  {.descriptor = 0x02,
384  .level = 1,
386  .size[0] = 4096,
387  .associativity = SHRT_MAX,
388  .entries = 2,
389  },
390 // 0x03
391  {.descriptor = 0x03,
392  .level = 1,
394  .size[0] = 4,
395  .associativity = 4,
396  .entries = 64,
397  },
398 // 0x04
399  {.descriptor = 0x04,
400  .level = 1,
402  .size[0] = 4096,
403  .associativity = 4,
404  .entries = 8,
405  },
406 // 0x05
407  {.descriptor = 0x05,
408  .level = 1,
410  .size[0] = 4096,
411  .associativity = 4,
412  .entries = 32,
413  },
414 // 0x06
415  {.descriptor = 0x06,
416  .level = 1,
417  .type = PAPI_MH_TYPE_INST,
418  .size[0] = 8,
419  .associativity = 4,
420  .line_size = 32,
421  },
422 // 0x08
423  {.descriptor = 0x08,
424  .level = 1,
425  .type = PAPI_MH_TYPE_INST,
426  .size[0] = 16,
427  .associativity = 4,
428  .line_size = 32,
429  },
430 // 0x09
431  {.descriptor = 0x09,
432  .level = 1,
433  .type = PAPI_MH_TYPE_INST,
434  .size[0] = 32,
435  .associativity = 4,
436  .line_size = 64,
437  },
438 // 0x0A
439  {.descriptor = 0x0A,
440  .level = 1,
441  .type = PAPI_MH_TYPE_DATA,
442  .size[0] = 8,
443  .associativity = 2,
444  .line_size = 32,
445  },
446 // 0x0B
447  {.descriptor = 0x0B,
448  .level = 1,
450  .size[0] = 4096,
451  .associativity = 4,
452  .entries = 4,
453  },
454 // 0x0C
455  {.descriptor = 0x0C,
456  .level = 1,
457  .type = PAPI_MH_TYPE_DATA,
458  .size[0] = 16,
459  .associativity = 4,
460  .line_size = 32,
461  },
462 // 0x0D
463  {.descriptor = 0x0D,
464  .level = 1,
465  .type = PAPI_MH_TYPE_DATA,
466  .size[0] = 16,
467  .associativity = 4,
468  .line_size = 64,
469  },
470 // 0x0E
471  {.descriptor = 0x0E,
472  .level = 1,
473  .type = PAPI_MH_TYPE_DATA,
474  .size[0] = 24,
475  .associativity = 6,
476  .line_size = 64,
477  },
478 // 0x21
479  {.descriptor = 0x21,
480  .level = 2,
481  .type = PAPI_MH_TYPE_UNIFIED,
482  .size[0] = 256,
483  .associativity = 8,
484  .line_size = 64,
485  },
486 // 0x22
487  {.descriptor = 0x22,
488  .level = 3,
489  .type = PAPI_MH_TYPE_UNIFIED,
490  .size[0] = 512,
491  .associativity = 4,
492  .sector = 1,
493  .line_size = 64,
494  },
495 // 0x23
496  {.descriptor = 0x23,
497  .level = 3,
498  .type = PAPI_MH_TYPE_UNIFIED,
499  .size[0] = 1024,
500  .associativity = 8,
501  .sector = 1,
502  .line_size = 64,
503  },
504 // 0x25
505  {.descriptor = 0x25,
506  .level = 3,
507  .type = PAPI_MH_TYPE_UNIFIED,
508  .size[0] = 2048,
509  .associativity = 8,
510  .sector = 1,
511  .line_size = 64,
512  },
513 // 0x29
514  {.descriptor = 0x29,
515  .level = 3,
516  .type = PAPI_MH_TYPE_UNIFIED,
517  .size[0] = 4096,
518  .associativity = 8,
519  .sector = 1,
520  .line_size = 64,
521  },
522 // 0x2C
523  {.descriptor = 0x2C,
524  .level = 1,
525  .type = PAPI_MH_TYPE_DATA,
526  .size[0] = 32,
527  .associativity = 8,
528  .line_size = 64,
529  },
530 // 0x30
531  {.descriptor = 0x30,
532  .level = 1,
533  .type = PAPI_MH_TYPE_INST,
534  .size[0] = 32,
535  .associativity = 8,
536  .line_size = 64,
537  },
538 // 0x39
539  {.descriptor = 0x39,
540  .level = 2,
541  .type = PAPI_MH_TYPE_UNIFIED,
542  .size[0] = 128,
543  .associativity = 4,
544  .sector = 1,
545  .line_size = 64,
546  },
547 // 0x3A
548  {.descriptor = 0x3A,
549  .level = 2,
550  .type = PAPI_MH_TYPE_UNIFIED,
551  .size[0] = 192,
552  .associativity = 6,
553  .sector = 1,
554  .line_size = 64,
555  },
556 // 0x3B
557  {.descriptor = 0x3B,
558  .level = 2,
559  .type = PAPI_MH_TYPE_UNIFIED,
560  .size[0] = 128,
561  .associativity = 2,
562  .sector = 1,
563  .line_size = 64,
564  },
565 // 0x3C
566  {.descriptor = 0x3C,
567  .level = 2,
568  .type = PAPI_MH_TYPE_UNIFIED,
569  .size[0] = 256,
570  .associativity = 4,
571  .sector = 1,
572  .line_size = 64,
573  },
574 // 0x3D
575  {.descriptor = 0x3D,
576  .level = 2,
577  .type = PAPI_MH_TYPE_UNIFIED,
578  .size[0] = 384,
579  .associativity = 6,
580  .sector = 1,
581  .line_size = 64,
582  },
583 // 0x3E
584  {.descriptor = 0x3E,
585  .level = 2,
586  .type = PAPI_MH_TYPE_UNIFIED,
587  .size[0] = 512,
588  .associativity = 4,
589  .sector = 1,
590  .line_size = 64,
591  },
592 // 0x40: no last level cache (??)
593 // 0x41
594  {.descriptor = 0x41,
595  .level = 2,
596  .type = PAPI_MH_TYPE_UNIFIED,
597  .size[0] = 128,
598  .associativity = 4,
599  .line_size = 32,
600  },
601 // 0x42
602  {.descriptor = 0x42,
603  .level = 2,
604  .type = PAPI_MH_TYPE_UNIFIED,
605  .size[0] = 256,
606  .associativity = 4,
607  .line_size = 32,
608  },
609 // 0x43
610  {.descriptor = 0x43,
611  .level = 2,
612  .type = PAPI_MH_TYPE_UNIFIED,
613  .size[0] = 512,
614  .associativity = 4,
615  .line_size = 32,
616  },
617 // 0x44
618  {.descriptor = 0x44,
619  .level = 2,
620  .type = PAPI_MH_TYPE_UNIFIED,
621  .size[0] = 1024,
622  .associativity = 4,
623  .line_size = 32,
624  },
625 // 0x45
626  {.descriptor = 0x45,
627  .level = 2,
628  .type = PAPI_MH_TYPE_UNIFIED,
629  .size[0] = 2048,
630  .associativity = 4,
631  .line_size = 32,
632  },
633 // 0x46
634  {.descriptor = 0x46,
635  .level = 3,
636  .type = PAPI_MH_TYPE_UNIFIED,
637  .size[0] = 4096,
638  .associativity = 4,
639  .line_size = 64,
640  },
641 // 0x47
642  {.descriptor = 0x47,
643  .level = 3,
644  .type = PAPI_MH_TYPE_UNIFIED,
645  .size[0] = 8192,
646  .associativity = 8,
647  .line_size = 64,
648  },
649 // 0x48
650  {.descriptor = 0x48,
651  .level = 2,
652  .type = PAPI_MH_TYPE_UNIFIED,
653  .size[0] = 3072,
654  .associativity = 12,
655  .line_size = 64,
656  },
657 // 0x49 NOTE: for family 0x0F model 0x06 this is level 3
658  {.descriptor = 0x49,
659  .level = 2,
660  .type = PAPI_MH_TYPE_UNIFIED,
661  .size[0] = 4096,
662  .associativity = 16,
663  .line_size = 64,
664  },
665 // 0x4A
666  {.descriptor = 0x4A,
667  .level = 3,
668  .type = PAPI_MH_TYPE_UNIFIED,
669  .size[0] = 6144,
670  .associativity = 12,
671  .line_size = 64,
672  },
673 // 0x4B
674  {.descriptor = 0x4B,
675  .level = 3,
676  .type = PAPI_MH_TYPE_UNIFIED,
677  .size[0] = 8192,
678  .associativity = 16,
679  .line_size = 64,
680  },
681 // 0x4C
682  {.descriptor = 0x4C,
683  .level = 3,
684  .type = PAPI_MH_TYPE_UNIFIED,
685  .size[0] = 12288,
686  .associativity = 12,
687  .line_size = 64,
688  },
689 // 0x4D
690  {.descriptor = 0x4D,
691  .level = 3,
692  .type = PAPI_MH_TYPE_UNIFIED,
693  .size[0] = 16384,
694  .associativity = 16,
695  .line_size = 64,
696  },
697 // 0x4E
698  {.descriptor = 0x4E,
699  .level = 2,
700  .type = PAPI_MH_TYPE_UNIFIED,
701  .size[0] = 6144,
702  .associativity = 24,
703  .line_size = 64,
704  },
705 // 0x4F
706  {.descriptor = 0x4F,
707  .level = 1,
709  .size[0] = 4,
710  .associativity = SHRT_MAX,
711  .entries = 32,
712  },
713 // 0x50
714  {.descriptor = 0x50,
715  .level = 1,
717  .size = {4, 2048, 4096},
718  .associativity = SHRT_MAX,
719  .entries = 64,
720  },
721 // 0x51
722  {.descriptor = 0x51,
723  .level = 1,
725  .size = {4, 2048, 4096},
726  .associativity = SHRT_MAX,
727  .entries = 128,
728  },
729 // 0x52
730  {.descriptor = 0x52,
731  .level = 1,
733  .size = {4, 2048, 4096},
734  .associativity = SHRT_MAX,
735  .entries = 256,
736  },
737 // 0x55
738  {.descriptor = 0x55,
739  .level = 1,
741  .size = {2048, 4096, 0},
742  .associativity = SHRT_MAX,
743  .entries = 7,
744  },
745 // 0x56
746  {.descriptor = 0x56,
747  .level = 1,
749  .size[0] = 4096,
750  .associativity = 4,
751  .entries = 16,
752  },
753 // 0x57
754  {.descriptor = 0x57,
755  .level = 1,
757  .size[0] = 4,
758  .associativity = 4,
759  .entries = 16,
760  },
761 // 0x59
762  {.descriptor = 0x59,
763  .level = 1,
765  .size[0] = 4,
766  .associativity = SHRT_MAX,
767  .entries = 16,
768  },
769 // 0x5A
770  {.descriptor = 0x5A,
771  .level = 1,
773  .size = {2048, 4096, 0},
774  .associativity = 4,
775  .entries = 32,
776  },
777 // 0x5B
778  {.descriptor = 0x5B,
779  .level = 1,
781  .size = {4, 4096, 0},
782  .associativity = SHRT_MAX,
783  .entries = 64,
784  },
785 // 0x5C
786  {.descriptor = 0x5C,
787  .level = 1,
789  .size = {4, 4096, 0},
790  .associativity = SHRT_MAX,
791  .entries = 128,
792  },
793 // 0x5D
794  {.descriptor = 0x5D,
795  .level = 1,
797  .size = {4, 4096, 0},
798  .associativity = SHRT_MAX,
799  .entries = 256,
800  },
801 // 0x60
802  {.descriptor = 0x60,
803  .level = 1,
804  .type = PAPI_MH_TYPE_DATA,
805  .size[0] = 16,
806  .associativity = 8,
807  .sector = 1,
808  .line_size = 64,
809  },
810 // 0x66
811  {.descriptor = 0x66,
812  .level = 1,
813  .type = PAPI_MH_TYPE_DATA,
814  .size[0] = 8,
815  .associativity = 4,
816  .sector = 1,
817  .line_size = 64,
818  },
819 // 0x67
820  {.descriptor = 0x67,
821  .level = 1,
822  .type = PAPI_MH_TYPE_DATA,
823  .size[0] = 16,
824  .associativity = 4,
825  .sector = 1,
826  .line_size = 64,
827  },
828 // 0x68
829  {.descriptor = 0x68,
830  .level = 1,
831  .type = PAPI_MH_TYPE_DATA,
832  .size[0] = 32,
833  .associativity = 4,
834  .sector = 1,
835  .line_size = 64,
836  },
837 // 0x70
838  {.descriptor = 0x70,
839  .level = 1,
840  .type = PAPI_MH_TYPE_TRACE,
841  .size[0] = 12,
842  .associativity = 8,
843  },
844 // 0x71
845  {.descriptor = 0x71,
846  .level = 1,
847  .type = PAPI_MH_TYPE_TRACE,
848  .size[0] = 16,
849  .associativity = 8,
850  },
851 // 0x72
852  {.descriptor = 0x72,
853  .level = 1,
854  .type = PAPI_MH_TYPE_TRACE,
855  .size[0] = 32,
856  .associativity = 8,
857  },
858 // 0x73
859  {.descriptor = 0x73,
860  .level = 1,
861  .type = PAPI_MH_TYPE_TRACE,
862  .size[0] = 64,
863  .associativity = 8,
864  },
865 // 0x78
866  {.descriptor = 0x78,
867  .level = 2,
868  .type = PAPI_MH_TYPE_UNIFIED,
869  .size[0] = 1024,
870  .associativity = 4,
871  .line_size = 64,
872  },
873 // 0x79
874  {.descriptor = 0x79,
875  .level = 2,
876  .type = PAPI_MH_TYPE_UNIFIED,
877  .size[0] = 128,
878  .associativity = 8,
879  .sector = 1,
880  .line_size = 64,
881  },
882 // 0x7A
883  {.descriptor = 0x7A,
884  .level = 2,
885  .type = PAPI_MH_TYPE_UNIFIED,
886  .size[0] = 256,
887  .associativity = 8,
888  .sector = 1,
889  .line_size = 64,
890  },
891 // 0x7B
892  {.descriptor = 0x7B,
893  .level = 2,
894  .type = PAPI_MH_TYPE_UNIFIED,
895  .size[0] = 512,
896  .associativity = 8,
897  .sector = 1,
898  .line_size = 64,
899  },
900 // 0x7C
901  {.descriptor = 0x7C,
902  .level = 2,
903  .type = PAPI_MH_TYPE_UNIFIED,
904  .size[0] = 1024,
905  .associativity = 8,
906  .sector = 1,
907  .line_size = 64,
908  },
909 // 0x7D
910  {.descriptor = 0x7D,
911  .level = 2,
912  .type = PAPI_MH_TYPE_UNIFIED,
913  .size[0] = 2048,
914  .associativity = 8,
915  .line_size = 64,
916  },
917 // 0x7F
918  {.descriptor = 0x7F,
919  .level = 2,
920  .type = PAPI_MH_TYPE_UNIFIED,
921  .size[0] = 512,
922  .associativity = 2,
923  .line_size = 64,
924  },
925 // 0x80
926  {.descriptor = 0x80,
927  .level = 2,
928  .type = PAPI_MH_TYPE_UNIFIED,
929  .size[0] = 512,
930  .associativity = 8,
931  .line_size = 64,
932  },
933 // 0x82
934  {.descriptor = 0x82,
935  .level = 2,
936  .type = PAPI_MH_TYPE_UNIFIED,
937  .size[0] = 256,
938  .associativity = 8,
939  .line_size = 32,
940  },
941 // 0x83
942  {.descriptor = 0x83,
943  .level = 2,
944  .type = PAPI_MH_TYPE_UNIFIED,
945  .size[0] = 512,
946  .associativity = 8,
947  .line_size = 32,
948  },
949 // 0x84
950  {.descriptor = 0x84,
951  .level = 2,
952  .type = PAPI_MH_TYPE_UNIFIED,
953  .size[0] = 1024,
954  .associativity = 8,
955  .line_size = 32,
956  },
957 // 0x85
958  {.descriptor = 0x85,
959  .level = 2,
960  .type = PAPI_MH_TYPE_UNIFIED,
961  .size[0] = 2048,
962  .associativity = 8,
963  .line_size = 32,
964  },
965 // 0x86
966  {.descriptor = 0x86,
967  .level = 2,
968  .type = PAPI_MH_TYPE_UNIFIED,
969  .size[0] = 512,
970  .associativity = 4,
971  .line_size = 64,
972  },
973 // 0x87
974  {.descriptor = 0x87,
975  .level = 2,
976  .type = PAPI_MH_TYPE_UNIFIED,
977  .size[0] = 1024,
978  .associativity = 8,
979  .line_size = 64,
980  },
981 // 0xB0
982  {.descriptor = 0xB0,
983  .level = 1,
985  .size[0] = 4,
986  .associativity = 4,
987  .entries = 128,
988  },
989 // 0xB1 NOTE: This is currently the only instance where .entries
990 // is dependent on .size. It's handled as a code exception.
991 // If other instances appear in the future, the structure
992 // should probably change to accomodate it.
993  {.descriptor = 0xB1,
994  .level = 1,
996  .size = {2048, 4096, 0},
997  .associativity = 4,
998  .entries = 8, /* or 4 if size = 4096 */
999  },
1000 // 0xB2
1001  {.descriptor = 0xB2,
1002  .level = 1,
1004  .size[0] = 4,
1005  .associativity = 4,
1006  .entries = 64,
1007  },
1008 // 0xB3
1009  {.descriptor = 0xB3,
1010  .level = 1,
1012  .size[0] = 4,
1013  .associativity = 4,
1014  .entries = 128,
1015  },
1016 // 0xB4
1017  {.descriptor = 0xB4,
1018  .level = 1,
1020  .size[0] = 4,
1021  .associativity = 4,
1022  .entries = 256,
1023  },
1024 // 0xBA
1025  {.descriptor = 0xBA,
1026  .level = 1,
1028  .size[0] = 4,
1029  .associativity = 4,
1030  .entries = 64,
1031  },
1032 // 0xC0
1033  {.descriptor = 0xBA,
1034  .level = 1,
1036  .size = {4,4096},
1037  .associativity = 4,
1038  .entries = 8,
1039  },
1040 // 0xCA
1041  {.descriptor = 0xCA,
1042  .level = 2,
1044  .size[0] = 4,
1045  .associativity = 4,
1046  .entries = 512,
1047  },
1048 // 0xD0
1049  {.descriptor = 0xD0,
1050  .level = 3,
1051  .type = PAPI_MH_TYPE_UNIFIED,
1052  .size[0] = 512,
1053  .associativity = 4,
1054  .line_size = 64,
1055  },
1056 // 0xD1
1057  {.descriptor = 0xD1,
1058  .level = 3,
1059  .type = PAPI_MH_TYPE_UNIFIED,
1060  .size[0] = 1024,
1061  .associativity = 4,
1062  .line_size = 64,
1063  },
1064 // 0xD2
1065  {.descriptor = 0xD2,
1066  .level = 3,
1067  .type = PAPI_MH_TYPE_UNIFIED,
1068  .size[0] = 2048,
1069  .associativity = 4,
1070  .line_size = 64,
1071  },
1072 // 0xD6
1073  {.descriptor = 0xD6,
1074  .level = 3,
1075  .type = PAPI_MH_TYPE_UNIFIED,
1076  .size[0] = 1024,
1077  .associativity = 8,
1078  .line_size = 64,
1079  },
1080 // 0xD7
1081  {.descriptor = 0xD7,
1082  .level = 3,
1083  .type = PAPI_MH_TYPE_UNIFIED,
1084  .size[0] = 2048,
1085  .associativity = 8,
1086  .line_size = 64,
1087  },
1088 // 0xD8
1089  {.descriptor = 0xD8,
1090  .level = 3,
1091  .type = PAPI_MH_TYPE_UNIFIED,
1092  .size[0] = 4096,
1093  .associativity = 8,
1094  .line_size = 64,
1095  },
1096 // 0xDC
1097  {.descriptor = 0xDC,
1098  .level = 3,
1099  .type = PAPI_MH_TYPE_UNIFIED,
1100  .size[0] = 1536,
1101  .associativity = 12,
1102  .line_size = 64,
1103  },
1104 // 0xDD
1105  {.descriptor = 0xDD,
1106  .level = 3,
1107  .type = PAPI_MH_TYPE_UNIFIED,
1108  .size[0] = 3072,
1109  .associativity = 12,
1110  .line_size = 64,
1111  },
1112 // 0xDE
1113  {.descriptor = 0xDE,
1114  .level = 3,
1115  .type = PAPI_MH_TYPE_UNIFIED,
1116  .size[0] = 6144,
1117  .associativity = 12,
1118  .line_size = 64,
1119  },
1120 // 0xE2
1121  {.descriptor = 0xE2,
1122  .level = 3,
1123  .type = PAPI_MH_TYPE_UNIFIED,
1124  .size[0] = 2048,
1125  .associativity = 16,
1126  .line_size = 64,
1127  },
1128 // 0xE3
1129  {.descriptor = 0xE3,
1130  .level = 3,
1131  .type = PAPI_MH_TYPE_UNIFIED,
1132  .size[0] = 4096,
1133  .associativity = 16,
1134  .line_size = 64,
1135  },
1136 // 0xE4
1137  {.descriptor = 0xE4,
1138  .level = 3,
1139  .type = PAPI_MH_TYPE_UNIFIED,
1140  .size[0] = 8192,
1141  .associativity = 16,
1142  .line_size = 64,
1143  },
1144 // 0xEA
1145  {.descriptor = 0xEA,
1146  .level = 3,
1147  .type = PAPI_MH_TYPE_UNIFIED,
1148  .size[0] = 12288,
1149  .associativity = 24,
1150  .line_size = 64,
1151  },
1152 // 0xEB
1153  {.descriptor = 0xEB,
1154  .level = 3,
1155  .type = PAPI_MH_TYPE_UNIFIED,
1156  .size[0] = 18432,
1157  .associativity = 24,
1158  .line_size = 64,
1159  },
1160 // 0xEC
1161  {.descriptor = 0xEC,
1162  .level = 3,
1163  .type = PAPI_MH_TYPE_UNIFIED,
1164  .size[0] = 24576,
1165  .associativity = 24,
1166  .line_size = 64,
1167  },
1168 // 0xF0
1169  {.descriptor = 0xF0,
1170  .level = 1,
1171  .type = PAPI_MH_TYPE_PREF,
1172  .size[0] = 64,
1173  },
1174 // 0xF1
1175  {.descriptor = 0xF1,
1176  .level = 1,
1177  .type = PAPI_MH_TYPE_PREF,
1178  .size[0] = 128,
1179  },
1180 };
1181 
1182 #ifdef DEBUG
1183 static void
1185 {
1186  int i, j, k =
1187  ( int ) ( sizeof ( intel_cache ) /
1188  sizeof ( struct _intel_cache_info ) );
1189  for ( i = 0; i < k; i++ ) {
1190  printf( "%d.\tDescriptor: %#x\n", i, intel_cache[i].descriptor );
1191  printf( "\t Level: %d\n", intel_cache[i].level );
1192  printf( "\t Type: %d\n", intel_cache[i].type );
1193  printf( "\t Size(s): " );
1194  for ( j = 0; j < TLB_SIZES; j++ )
1195  printf( "%d, ", intel_cache[i].size[j] );
1196  printf( "\n" );
1197  printf( "\t Assoc: %d\n", intel_cache[i].associativity );
1198  printf( "\t Sector: %d\n", intel_cache[i].sector );
1199  printf( "\t Line Size: %d\n", intel_cache[i].line_size );
1200  printf( "\t Entries: %d\n", intel_cache[i].entries );
1201  printf( "\n" );
1202  }
1203 }
1204 #endif
1205 
1206 /* Given a specific cache descriptor, this routine decodes the information from a table
1207  * of such descriptors and fills out one or more records in a PAPI data structure.
1208  * Called only by init_intel()
1209  */
1210 static void
1212 {
1213  int i, next;
1214  int level = d->level - 1;
1217 
1218  if ( d->descriptor == 0x49 ) { /* special case */
1219  unsigned int r_eax, r_ebx, r_ecx, r_edx;
1220  r_eax = 0x1; /* function code 1: family & model */
1221  cpuid( &r_eax, &r_ebx, &r_ecx, &r_edx );
1222  /* override table for Family F, model 6 only */
1223  if ( ( r_eax & 0x0FFF3FF0 ) == 0xF60 )
1224  level = 3;
1225  }
1226  if ( d->type & PAPI_MH_TYPE_TLB ) {
1227  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1228  if ( L[level].tlb[next].type == PAPI_MH_TYPE_EMPTY )
1229  break;
1230  }
1231  /* expand TLB entries for multiple possible page sizes */
1232  for ( i = 0; i < TLB_SIZES && next < PAPI_MH_MAX_LEVELS && d->size[i];
1233  i++, next++ ) {
1234 // printf("Level %d Descriptor: %#x TLB type %#x next: %d, i: %d\n", level, d->descriptor, d->type, next, i);
1235  t = &L[level].tlb[next];
1236  t->type = PAPI_MH_CACHE_TYPE( d->type );
1237  t->num_entries = d->entries;
1238  t->page_size = d->size[i] << 10; /* minimum page size in KB */
1239  t->associativity = d->associativity;
1240  /* another special case */
1241  if ( d->descriptor == 0xB1 && d->size[i] == 4096 )
1242  t->num_entries = d->entries / 2;
1243  }
1244  } else {
1245  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1246  if ( L[level].cache[next].type == PAPI_MH_TYPE_EMPTY )
1247  break;
1248  }
1249 // printf("Level %d Descriptor: %#x Cache type %#x next: %d\n", level, d->descriptor, d->type, next);
1250  c = &L[level].cache[next];
1251  c->type = PAPI_MH_CACHE_TYPE( d->type );
1252  c->size = d->size[0] << 10; /* convert from KB to bytes */
1253  c->associativity = d->associativity;
1254  if ( d->line_size ) {
1255  c->line_size = d->line_size;
1256  c->num_lines = c->size / c->line_size;
1257  }
1258  }
1259 }
1260 
1261 #if defined(__amd64__) || defined(__x86_64__)
1262 static inline void
1263 cpuid2( unsigned int*eax, unsigned int* ebx,
1264  unsigned int*ecx, unsigned int *edx,
1265  unsigned int index, unsigned int ecx_in )
1266 {
1267  __asm__ __volatile__ ("cpuid;"
1268  : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
1269  : "0" (index), "2"(ecx_in) );
1270 }
1271 #else
1272 static inline void
1273 cpuid2 ( unsigned int* eax, unsigned int* ebx,
1274  unsigned int* ecx, unsigned int* edx,
1275  unsigned int index, unsigned int ecx_in )
1276 {
1277  unsigned int a,b,c,d;
1278  __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
1279  : "=a" (a), "=S" (b), "=c" (c), "=d" (d) \
1280  : "0" (index), "2"(ecx_in) );
1281  *eax = a; *ebx = b; *ecx = c; *edx = d;
1282 }
1283 #endif
1284 
1285 
1286 static int
1287 init_intel_leaf4( PAPI_mh_info_t * mh_info, int *num_levels )
1288 {
1289 
1290  unsigned int eax, ebx, ecx, edx;
1291  unsigned int maxidx, ecx_in;
1292  int next;
1293 
1294  int cache_type,cache_level,cache_selfinit,cache_fullyassoc;
1295  int cache_linesize,cache_partitions,cache_ways,cache_sets;
1296 
1298 
1299  *num_levels=0;
1300 
1301  cpuid2(&eax,&ebx,&ecx,&edx, 0, 0);
1302  maxidx = eax;
1303 
1304  if (maxidx<4) {
1305  MEMDBG("Warning! CPUID Index 4 not supported!\n");
1306  return PAPI_ENOSUPP;
1307  }
1308 
1309  ecx_in=0;
1310  while(1) {
1311  cpuid2(&eax,&ebx,&ecx,&edx, 4, ecx_in);
1312 
1313 
1314 
1315  /* decoded as per table 3-12 in Intel Software Developer's Manual Volume 2A */
1316 
1317  cache_type=eax&0x1f;
1318  if (cache_type==0) break;
1319 
1320  cache_level=(eax>>5)&0x3;
1321  cache_selfinit=(eax>>8)&0x1;
1322  cache_fullyassoc=(eax>>9)&0x1;
1323 
1324  cache_linesize=(ebx&0xfff)+1;
1325  cache_partitions=((ebx>>12)&0x3ff)+1;
1326  cache_ways=((ebx>>22)&0x3ff)+1;
1327 
1328  cache_sets=(ecx)+1;
1329 
1330  /* should we export this info?
1331 
1332  cache_maxshare=((eax>>14)&0xfff)+1;
1333  cache_maxpackage=((eax>>26)&0x3f)+1;
1334 
1335  cache_wb=(edx)&1;
1336  cache_inclusive=(edx>>1)&1;
1337  cache_indexing=(edx>>2)&1;
1338  */
1339 
1340  if (cache_level>*num_levels) *num_levels=cache_level;
1341 
1342  /* find next slot available to hold cache info */
1343  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1344  if ( mh_info->level[cache_level-1].cache[next].type == PAPI_MH_TYPE_EMPTY ) break;
1345  }
1346 
1347  c=&(mh_info->level[cache_level-1].cache[next]);
1348 
1349  switch(cache_type) {
1350  case 1: MEMDBG("L%d Data Cache\n",cache_level);
1352  break;
1353  case 2: MEMDBG("L%d Instruction Cache\n",cache_level);
1355  break;
1356  case 3: MEMDBG("L%d Unified Cache\n",cache_level);
1358  break;
1359  }
1360 
1361  if (cache_selfinit) { MEMDBG("\tSelf-init\n"); }
1362  if (cache_fullyassoc) { MEMDBG("\tFully Associtative\n"); }
1363 
1364  //MEMDBG("\tMax logical processors sharing cache: %d\n",cache_maxshare);
1365  //MEMDBG("\tMax logical processors sharing package: %d\n",cache_maxpackage);
1366 
1367  MEMDBG("\tCache linesize: %d\n",cache_linesize);
1368 
1369  MEMDBG("\tCache partitions: %d\n",cache_partitions);
1370  MEMDBG("\tCache associaticity: %d\n",cache_ways);
1371 
1372  MEMDBG("\tCache sets: %d\n",cache_sets);
1373  MEMDBG("\tCache size = %dkB\n",
1374  (cache_ways*cache_partitions*cache_linesize*cache_sets)/1024);
1375 
1376  //MEMDBG("\tWBINVD/INVD acts on lower caches: %d\n",cache_wb);
1377  //MEMDBG("\tCache is not inclusive: %d\n",cache_inclusive);
1378  //MEMDBG("\tComplex cache indexing: %d\n",cache_indexing);
1379 
1380  c->line_size=cache_linesize;
1381  if (cache_fullyassoc) {
1382  c->associativity=SHRT_MAX;
1383  }
1384  else {
1385  c->associativity=cache_ways;
1386  }
1387  c->size=(cache_ways*cache_partitions*cache_linesize*cache_sets);
1388  c->num_lines=cache_ways*cache_partitions*cache_sets;
1389 
1390  ecx_in++;
1391  }
1392  return PAPI_OK;
1393 }
1394 
1395 static int
1396 init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels)
1397 {
1398  /* cpuid() returns memory copies of 4 32-bit registers
1399  * this union allows them to be accessed as either registers
1400  * or individual bytes. Remember that Intel is little-endian.
1401  */
1402  union
1403  {
1404  struct
1405  {
1406  unsigned int ax, bx, cx, dx;
1407  } e;
1408  unsigned char descrip[16];
1409  } reg;
1410 
1411  int r; /* register boundary index */
1412  int b; /* byte index into a register */
1413  int i; /* byte index into the descrip array */
1414  int t; /* table index into the static descriptor table */
1415  int count; /* how many times to call cpuid; from eax:lsb */
1416  int size; /* size of the descriptor table */
1417  int last_level = 0; /* how many levels in the cache hierarchy */
1418 
1419  /* All of Intel's cache info is in 1 call to cpuid
1420  * however it is a table lookup :(
1421  */
1422  MEMDBG( "Initializing Intel Cache and TLB descriptors\n" );
1423 
1424 #ifdef DEBUG
1425  if ( ISLEVEL( DEBUG_MEMORY ) )
1427 #endif
1428 
1429  reg.e.ax = 0x2; /* function code 2: cache descriptors */
1430  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
1431 
1432  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
1433  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
1434  MEMDBG
1435  ( ":\nd0: %#x %#x %#x %#x\nd1: %#x %#x %#x %#x\nd2: %#x %#x %#x %#x\nd3: %#x %#x %#x %#x\n",
1436  reg.descrip[0], reg.descrip[1], reg.descrip[2], reg.descrip[3],
1437  reg.descrip[4], reg.descrip[5], reg.descrip[6], reg.descrip[7],
1438  reg.descrip[8], reg.descrip[9], reg.descrip[10], reg.descrip[11],
1439  reg.descrip[12], reg.descrip[13], reg.descrip[14], reg.descrip[15] );
1440 
1441  count = reg.descrip[0]; /* # times to repeat CPUID call. Not implemented. */
1442 
1443  /* Knights Corner at least returns 0 here */
1444  if (count==0) goto early_exit;
1445 
1446  size = ( sizeof ( intel_cache ) / sizeof ( struct _intel_cache_info ) ); /* # descriptors */
1447  MEMDBG( "Repeat cpuid(2,...) %d times. If not 1, code is broken.\n",
1448  count );
1449  if (count!=1) {
1450  fprintf(stderr,"Warning: Unhandled cpuid count of %d\n",count);
1451  }
1452 
1453  for ( r = 0; r < 4; r++ ) { /* walk the registers */
1454  if ( ( reg.descrip[r * 4 + 3] & 0x80 ) == 0 ) { /* only process if high order bit is 0 */
1455  for ( b = 3; b >= 0; b-- ) { /* walk the descriptor bytes from high to low */
1456  i = r * 4 + b; /* calculate an index into the array of descriptors */
1457  if ( i ) { /* skip the low order byte in eax [0]; it's the count (see above) */
1458  if ( reg.descrip[i] == 0xff ) {
1459  MEMDBG("Warning! PAPI x86_cache: must implement cpuid leaf 4\n");
1460  return PAPI_ENOSUPP;
1461  /* we might continue instead */
1462  /* in order to get TLB info */
1463  /* continue; */
1464  }
1465  for ( t = 0; t < size; t++ ) { /* walk the descriptor table */
1466  if ( reg.descrip[i] == intel_cache[t].descriptor ) { /* find match */
1467  if ( intel_cache[t].level > last_level )
1468  last_level = intel_cache[t].level;
1469  intel_decode_descriptor( &intel_cache[t],
1470  mh_info->level );
1471  }
1472  }
1473  }
1474  }
1475  }
1476  }
1477 early_exit:
1478  MEMDBG( "# of Levels: %d\n", last_level );
1479  *num_levels=last_level;
1480  return PAPI_OK;
1481 }
1482 
1483 
1484 static int
1485 init_intel( PAPI_mh_info_t * mh_info, int *levels )
1486 {
1487 
1488  int result;
1489  int num_levels;
1490 
1491  /* try using the oldest leaf2 method first */
1492  result=init_intel_leaf2(mh_info, &num_levels);
1493 
1494  if (result!=PAPI_OK) {
1495  /* All Core2 and newer also support leaf4 detection */
1496  /* Starting with Westmere *only* leaf4 is supported */
1497  result=init_intel_leaf4(mh_info, &num_levels);
1498  }
1499 
1500  *levels=num_levels;
1501  return PAPI_OK;
1502 }
1503 
1504 
1505 /* Returns 1 if hypervisor detected */
1506 /* Returns 0 if none found. */
1507 int
1508 _x86_detect_hypervisor(char *vendor_name)
1509 {
1510  unsigned int eax, ebx, ecx, edx;
1511  char hyper_vendor_id[13];
1512 
1513  cpuid2(&eax, &ebx, &ecx, &edx,0x1,0);
1514  /* This is the hypervisor bit, ecx bit 31 */
1515  if (ecx&0x80000000) {
1516  /* There are various values in the 0x4000000X range */
1517  /* It is questionable how standard they are */
1518  /* For now we just return the name. */
1519  cpuid2(&eax, &ebx, &ecx, &edx, 0x40000000,0);
1520  memcpy(hyper_vendor_id + 0, &ebx, 4);
1521  memcpy(hyper_vendor_id + 4, &ecx, 4);
1522  memcpy(hyper_vendor_id + 8, &edx, 4);
1523  hyper_vendor_id[12] = '\0';
1524  strncpy(vendor_name,hyper_vendor_id,PAPI_MAX_STR_LEN);
1525  return 1;
1526  }
1527  else {
1528  strncpy(vendor_name,"none",PAPI_MAX_STR_LEN);
1529  }
1530  return 0;
1531 }
1532 
1533 
1534 
1535 
1536 
static void intel_decode_descriptor(struct _intel_cache_info *d, PAPI_mh_level_t *L)
static void print_intel_cache_table()
int levels
Definition: papi.h:771
#define PAPI_MH_TYPE_INST
Definition: papi.h:727
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
static struct _intel_cache_info intel_cache[]
#define PAPI_ENOSUPP
Definition: papi.h:269
static int init_intel(PAPI_mh_info_t *mh_info, int *levels)
#define PAPI_MH_TYPE_WB
Definition: papi.h:734
static void cpuid2(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx, unsigned int index, unsigned int ecx_in)
static int init_intel_leaf2(PAPI_mh_info_t *mh_info, int *num_levels)
int size[TLB_SIZES]
int associativity
Definition: papi.h:750
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
#define PAPI_MH_TYPE_PSEUDO_LRU
Definition: papi.h:738
#define DEBUG_MEMORY
Definition: papi_debug.h:34
#define printf
Definition: papi_test.h:125
double c
Definition: multiplex.c:22
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:37
#define PAPI_MH_TYPE_TRACE
Definition: papi.h:730
Return codes and api definitions.
#define PAPI_MH_TYPE_DATA
Definition: papi.h:728
#define PAPI_MH_TYPE_TLB
Definition: papi.h:740
t
Definition: iozone.c:23562
#define PAPI_MH_MAX_LEVELS
Definition: papi.h:742
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:765
int i
Definition: fileop.c:140
int _x86_cache_info(PAPI_mh_info_t *mh_info)
char *long long size
Definition: iozone.c:12023
#define TLB_SIZES
int k
Definition: iozone.c:19136
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:772
PAPI_mh_tlb_info_t tlb[PAPI_MH_MAX_LEVELS]
Definition: papi.h:764
static void init_mem_hierarchy(PAPI_mh_info_t *mh_info)
#define MEMDBG(format, args...)
Definition: papi_debug.h:70
mh for mem hierarchy maybe?
Definition: papi.h:770
#define PAPI_ENOIMPL
Definition: papi.h:270
int _x86_detect_hypervisor(char *vendor_name)
nsize_list next
Definition: iozone.c:20053
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:38
int
Definition: iozone.c:18528
#define ISLEVEL(a)
Definition: papi_debug.h:54
static int init_intel_leaf4(PAPI_mh_info_t *mh_info, int *num_levels)
static short int _amd_L2_L3_assoc(unsigned short int pattern)
#define PAPI_MH_TYPE_UNIFIED
Definition: papi.h:731
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
#define PAPI_MH_TYPE_EMPTY
Definition: papi.h:726
#define PAPI_MH_TYPE_PREF
Definition: papi.h:741
#define PAPI_MH_TYPE_WT
Definition: papi.h:733
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
static int init_amd(PAPI_mh_info_t *mh_info, int *levels)
#define PAPI_MH_CACHE_TYPE(a)
Definition: papi.h:732
int int op
Definition: iozone.c:19389
#define PAPI_MAX_MEM_HIERARCHY_LEVELS
Definition: papi.h:743
unsigned int pattern
Definition: iozone.c:1531