PAPI  5.6.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
x86_cpuid_info.c
Go to the documentation of this file.
1 /****************************/
2 /* THIS IS OPEN SOURCE CODE */
3 /****************************/
4 
5 /*
6 * File: x86_cpuid_info.c
7 * Author: Dan Terpstra
8 * terpstra@eecs.utk.edu
9 * complete rewrite of linux-memory.c to conform to latest docs
10 * and convert Intel to a table driven implementation.
11 * Now also supports multiple TLB descriptors
12 */
13 
14 #include <string.h>
15 #include <stdio.h>
16 #include "papi.h"
17 #include "papi_internal.h"
18 
19 static void init_mem_hierarchy( PAPI_mh_info_t * mh_info );
20 static int init_amd( PAPI_mh_info_t * mh_info, int *levels );
21 static short int _amd_L2_L3_assoc( unsigned short int pattern );
22 static int init_intel( PAPI_mh_info_t * mh_info , int *levels);
23 
24 #if defined( __amd64__ ) || defined (__x86_64__)
25 static inline void
26 cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d )
27 {
28  unsigned int op = *a;
29  __asm__("cpuid;"
30  : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
31  : "a" (op) );
32 }
33 #else
34 static inline void
35 cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d )
36 {
37  unsigned int op = *a;
38  // .byte 0x53 == push ebx. it's universal for 32 and 64 bit
39  // .byte 0x5b == pop ebx.
40  // Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode.
41  // Using the opcode directly avoids this problem.
42  __asm__ __volatile__( ".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b":"=a"( *a ), "=S"( *b ), "=c"( *c ),
43  "=d"
44  ( *d )
45  : "a"( op ) );
46 }
47 #endif
48 
49 int
51 {
52  int retval = 0;
53  union
54  {
55  struct
56  {
57  unsigned int ax, bx, cx, dx;
58  } e;
59  char vendor[20]; /* leave room for terminator bytes */
60  } reg;
61 
62  /* Don't use cpu_type to determine the processor.
63  * get the information directly from the chip.
64  */
65  reg.e.ax = 0; /* function code 0: vendor string */
66  /* The vendor string is composed of EBX:EDX:ECX.
67  * by swapping the register addresses in the call below,
68  * the string is correctly composed in the char array.
69  */
70  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.dx, &reg.e.cx );
71  reg.vendor[16] = 0;
72  MEMDBG( "Vendor: %s\n", &reg.vendor[4] );
73 
74  init_mem_hierarchy( mh_info );
75 
76  if ( !strncmp( "GenuineIntel", &reg.vendor[4], 12 ) ) {
77  init_intel( mh_info, &mh_info->levels);
78  } else if ( !strncmp( "AuthenticAMD", &reg.vendor[4], 12 ) ) {
79  init_amd( mh_info, &mh_info->levels );
80  } else {
81  MEMDBG( "Unsupported cpu type; Not Intel or AMD x86\n" );
82  return PAPI_ENOIMPL;
83  }
84 
85  /* This works only because an empty cache element is initialized to 0 */
86  MEMDBG( "Detected L1: %d L2: %d L3: %d\n",
87  mh_info->level[0].cache[0].size + mh_info->level[0].cache[1].size,
88  mh_info->level[1].cache[0].size + mh_info->level[1].cache[1].size,
89  mh_info->level[2].cache[0].size + mh_info->level[2].cache[1].size );
90  return retval;
91 }
92 
93 static void
95 {
96  int i, j;
97  PAPI_mh_level_t *L = mh_info->level;
98 
99  /* initialize entire memory hierarchy structure to benign values */
100  for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
101  for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
102  L[i].tlb[j].type = PAPI_MH_TYPE_EMPTY;
103  L[i].tlb[j].num_entries = 0;
104  L[i].tlb[j].associativity = 0;
106  L[i].cache[j].size = 0;
107  L[i].cache[j].line_size = 0;
108  L[i].cache[j].num_lines = 0;
109  L[i].cache[j].associativity = 0;
110  }
111  }
112 }
113 
114 static short int
115 _amd_L2_L3_assoc( unsigned short int pattern )
116 {
117  /* From "CPUID Specification" #25481 Rev 2.28, April 2008 */
118  short int assoc[16] =
119  { 0, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, SHRT_MAX };
120  if ( pattern > 0xF )
121  return -1;
122  return ( assoc[pattern] );
123 }
124 
125 /* Cache configuration for AMD Athlon/Duron */
126 static int
127 init_amd( PAPI_mh_info_t * mh_info, int *num_levels )
128 {
129  union
130  {
131  struct
132  {
133  unsigned int ax, bx, cx, dx;
134  } e;
135  unsigned char byt[16];
136  } reg;
137  int i, j, levels = 0;
138  PAPI_mh_level_t *L = mh_info->level;
139 
140  /*
141  * Layout of CPU information taken from :
142  * "CPUID Specification" #25481 Rev 2.28, April 2008 for most current info.
143  */
144 
145  MEMDBG( "Initializing AMD memory info\n" );
146  /* AMD level 1 cache info */
147  reg.e.ax = 0x80000005; /* extended function code 5: L1 Cache and TLB Identifiers */
148  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
149 
150  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
151  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
152  MEMDBG
153  ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
154  reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
155  reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
156  reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
157  reg.byt[15] );
158 
159  /* NOTE: We assume L1 cache and TLB always exists */
160  /* L1 TLB info */
161 
162  /* 4MB memory page information; half the number of entries as 2MB */
163  L[0].tlb[0].type = PAPI_MH_TYPE_INST;
164  L[0].tlb[0].num_entries = reg.byt[0] / 2;
165  L[0].tlb[0].page_size = 4096 << 10;
166  L[0].tlb[0].associativity = reg.byt[1];
167 
168  L[0].tlb[1].type = PAPI_MH_TYPE_DATA;
169  L[0].tlb[1].num_entries = reg.byt[2] / 2;
170  L[0].tlb[1].page_size = 4096 << 10;
171  L[0].tlb[1].associativity = reg.byt[3];
172 
173  /* 2MB memory page information */
174  L[0].tlb[2].type = PAPI_MH_TYPE_INST;
175  L[0].tlb[2].num_entries = reg.byt[0];
176  L[0].tlb[2].page_size = 2048 << 10;
177  L[0].tlb[2].associativity = reg.byt[1];
178 
179  L[0].tlb[3].type = PAPI_MH_TYPE_DATA;
180  L[0].tlb[3].num_entries = reg.byt[2];
181  L[0].tlb[3].page_size = 2048 << 10;
182  L[0].tlb[3].associativity = reg.byt[3];
183 
184  /* 4k page information */
185  L[0].tlb[4].type = PAPI_MH_TYPE_INST;
186  L[0].tlb[4].num_entries = reg.byt[4];
187  L[0].tlb[4].page_size = 4 << 10;
188  L[0].tlb[4].associativity = reg.byt[5];
189 
190  L[0].tlb[5].type = PAPI_MH_TYPE_DATA;
191  L[0].tlb[5].num_entries = reg.byt[6];
192  L[0].tlb[5].page_size = 4 << 10;
193  L[0].tlb[5].associativity = reg.byt[7];
194 
195  for ( i = 0; i < PAPI_MH_MAX_LEVELS; i++ ) {
196  if ( L[0].tlb[i].associativity == 0xff )
197  L[0].tlb[i].associativity = SHRT_MAX;
198  }
199 
200  /* L1 D-cache info */
201  L[0].cache[0].type =
203  L[0].cache[0].size = reg.byt[11] << 10;
204  L[0].cache[0].associativity = reg.byt[10];
205  L[0].cache[0].line_size = reg.byt[8];
206  /* Byt[9] is "Lines per tag" */
207  /* Is that == lines per cache? */
208  /* L[0].cache[1].num_lines = reg.byt[9]; */
209  if ( L[0].cache[0].line_size )
210  L[0].cache[0].num_lines = L[0].cache[0].size / L[0].cache[0].line_size;
211  MEMDBG( "D-Cache Line Count: %d; Computed: %d\n", reg.byt[9],
212  L[0].cache[0].num_lines );
213 
214  /* L1 I-cache info */
215  L[0].cache[1].type = PAPI_MH_TYPE_INST;
216  L[0].cache[1].size = reg.byt[15] << 10;
217  L[0].cache[1].associativity = reg.byt[14];
218  L[0].cache[1].line_size = reg.byt[12];
219  /* Byt[13] is "Lines per tag" */
220  /* Is that == lines per cache? */
221  /* L[0].cache[1].num_lines = reg.byt[13]; */
222  if ( L[0].cache[1].line_size )
223  L[0].cache[1].num_lines = L[0].cache[1].size / L[0].cache[1].line_size;
224  MEMDBG( "I-Cache Line Count: %d; Computed: %d\n", reg.byt[13],
225  L[0].cache[1].num_lines );
226 
227  for ( i = 0; i < 2; i++ ) {
228  if ( L[0].cache[i].associativity == 0xff )
229  L[0].cache[i].associativity = SHRT_MAX;
230  }
231 
232  /* AMD L2/L3 Cache and L2 TLB info */
233  /* NOTE: For safety we assume L2 and L3 cache and TLB may not exist */
234 
235  reg.e.ax = 0x80000006; /* extended function code 6: L2/L3 Cache and L2 TLB Identifiers */
236  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
237 
238  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
239  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
240  MEMDBG
241  ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
242  reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
243  reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
244  reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
245  reg.byt[15] );
246 
247  /* L2 TLB info */
248 
249  if ( reg.byt[0] | reg.byt[1] ) { /* Level 2 ITLB exists */
250  /* 4MB ITLB page information; half the number of entries as 2MB */
251  L[1].tlb[0].type = PAPI_MH_TYPE_INST;
252  L[1].tlb[0].num_entries =
253  ( ( ( short ) ( reg.byt[1] & 0xF ) << 8 ) + reg.byt[0] ) / 2;
254  L[1].tlb[0].page_size = 4096 << 10;
255  L[1].tlb[0].associativity =
256  _amd_L2_L3_assoc( ( reg.byt[1] & 0xF0 ) >> 4 );
257 
258  /* 2MB ITLB page information */
259  L[1].tlb[2].type = PAPI_MH_TYPE_INST;
260  L[1].tlb[2].num_entries = L[1].tlb[0].num_entries * 2;
261  L[1].tlb[2].page_size = 2048 << 10;
262  L[1].tlb[2].associativity = L[1].tlb[0].associativity;
263  }
264 
265  if ( reg.byt[2] | reg.byt[3] ) { /* Level 2 DTLB exists */
266  /* 4MB DTLB page information; half the number of entries as 2MB */
267  L[1].tlb[1].type = PAPI_MH_TYPE_DATA;
268  L[1].tlb[1].num_entries =
269  ( ( ( short ) ( reg.byt[3] & 0xF ) << 8 ) + reg.byt[2] ) / 2;
270  L[1].tlb[1].page_size = 4096 << 10;
271  L[1].tlb[1].associativity =
272  _amd_L2_L3_assoc( ( reg.byt[3] & 0xF0 ) >> 4 );
273 
274  /* 2MB DTLB page information */
275  L[1].tlb[3].type = PAPI_MH_TYPE_DATA;
276  L[1].tlb[3].num_entries = L[1].tlb[1].num_entries * 2;
277  L[1].tlb[3].page_size = 2048 << 10;
278  L[1].tlb[3].associativity = L[1].tlb[1].associativity;
279  }
280 
281  /* 4k page information */
282  if ( reg.byt[4] | reg.byt[5] ) { /* Level 2 ITLB exists */
283  L[1].tlb[4].type = PAPI_MH_TYPE_INST;
284  L[1].tlb[4].num_entries =
285  ( ( short ) ( reg.byt[5] & 0xF ) << 8 ) + reg.byt[4];
286  L[1].tlb[4].page_size = 4 << 10;
287  L[1].tlb[4].associativity =
288  _amd_L2_L3_assoc( ( reg.byt[5] & 0xF0 ) >> 4 );
289  }
290  if ( reg.byt[6] | reg.byt[7] ) { /* Level 2 DTLB exists */
291  L[1].tlb[5].type = PAPI_MH_TYPE_DATA;
292  L[1].tlb[5].num_entries =
293  ( ( short ) ( reg.byt[7] & 0xF ) << 8 ) + reg.byt[6];
294  L[1].tlb[5].page_size = 4 << 10;
295  L[1].tlb[5].associativity =
296  _amd_L2_L3_assoc( ( reg.byt[7] & 0xF0 ) >> 4 );
297  }
298 
299  /* AMD Level 2 cache info */
300  if ( reg.e.cx ) {
301  L[1].cache[0].type =
303  L[1].cache[0].size = ( int ) ( ( reg.e.cx & 0xffff0000 ) >> 6 ); /* right shift by 16; multiply by 2^10 */
304  L[1].cache[0].associativity =
305  _amd_L2_L3_assoc( ( reg.byt[9] & 0xF0 ) >> 4 );
306  L[1].cache[0].line_size = reg.byt[8];
307 /* L[1].cache[0].num_lines = reg.byt[9]&0xF; */
308  if ( L[1].cache[0].line_size )
309  L[1].cache[0].num_lines =
310  L[1].cache[0].size / L[1].cache[0].line_size;
311  MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[9] & 0xF,
312  L[1].cache[0].num_lines );
313  }
314 
315  /* AMD Level 3 cache info (shared across cores) */
316  if ( reg.e.dx ) {
317  L[2].cache[0].type =
319  L[2].cache[0].size = ( int ) ( reg.e.dx & 0xfffc0000 ) << 1; /* in blocks of 512KB (2^19) */
320  L[2].cache[0].associativity =
321  _amd_L2_L3_assoc( ( reg.byt[13] & 0xF0 ) >> 4 );
322  L[2].cache[0].line_size = reg.byt[12];
323 /* L[2].cache[0].num_lines = reg.byt[13]&0xF; */
324  if ( L[2].cache[0].line_size )
325  L[2].cache[0].num_lines =
326  L[2].cache[0].size / L[2].cache[0].line_size;
327  MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[13] & 0xF,
328  L[1].cache[0].num_lines );
329  }
330  for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
331  for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
332  /* Compute the number of levels of hierarchy actually used */
333  if ( L[i].tlb[j].type != PAPI_MH_TYPE_EMPTY ||
334  L[i].cache[j].type != PAPI_MH_TYPE_EMPTY )
335  levels = i + 1;
336  }
337  }
338  *num_levels = levels;
339  return PAPI_OK;
340 }
341 
342  /*
343  * The data from this table now comes from figure 3-17 in
344  * the Intel Architectures Software Reference Manual 2A
345  * (cpuid instruction section)
346  *
347  * Pretviously the information was provided by
348  * "Intel® Processor Identification and the CPUID Instruction",
349  * Application Note, AP-485, Nov 2008, 241618-033
350  * Updated to AP-485, Aug 2009, 241618-036
351  *
352  * The following data structure and its instantiation trys to
353  * capture all the information in Section 2.1.3 of the above
354  * document. Not all of it is used by PAPI, but it could be.
355  * As the above document is revised, this table should be
356  * updated.
357  */
358 
359 #define TLB_SIZES 3 /* number of different page sizes for a single TLB descriptor */
361 {
362  int descriptor; /* 0x00 - 0xFF: register descriptor code */
363  int level; /* 1 to PAPI_MH_MAX_LEVELS */
364  int type; /* Empty, instr, data, vector, unified | TLB */
365  int size[TLB_SIZES]; /* cache or TLB page size(s) in kB */
366  int associativity; /* SHRT_MAX == fully associative */
367  int sector; /* 1 if cache is sectored; else 0 */
368  int line_size; /* for cache */
369  int entries; /* for TLB */
370 };
371 
372 static struct _intel_cache_info intel_cache[] = {
373 // 0x01
374  {.descriptor = 0x01,
375  .level = 1,
377  .size[0] = 4,
378  .associativity = 4,
379  .entries = 32,
380  },
381 // 0x02
382  {.descriptor = 0x02,
383  .level = 1,
385  .size[0] = 4096,
386  .associativity = SHRT_MAX,
387  .entries = 2,
388  },
389 // 0x03
390  {.descriptor = 0x03,
391  .level = 1,
393  .size[0] = 4,
394  .associativity = 4,
395  .entries = 64,
396  },
397 // 0x04
398  {.descriptor = 0x04,
399  .level = 1,
401  .size[0] = 4096,
402  .associativity = 4,
403  .entries = 8,
404  },
405 // 0x05
406  {.descriptor = 0x05,
407  .level = 1,
409  .size[0] = 4096,
410  .associativity = 4,
411  .entries = 32,
412  },
413 // 0x06
414  {.descriptor = 0x06,
415  .level = 1,
416  .type = PAPI_MH_TYPE_INST,
417  .size[0] = 8,
418  .associativity = 4,
419  .line_size = 32,
420  },
421 // 0x08
422  {.descriptor = 0x08,
423  .level = 1,
424  .type = PAPI_MH_TYPE_INST,
425  .size[0] = 16,
426  .associativity = 4,
427  .line_size = 32,
428  },
429 // 0x09
430  {.descriptor = 0x09,
431  .level = 1,
432  .type = PAPI_MH_TYPE_INST,
433  .size[0] = 32,
434  .associativity = 4,
435  .line_size = 64,
436  },
437 // 0x0A
438  {.descriptor = 0x0A,
439  .level = 1,
440  .type = PAPI_MH_TYPE_DATA,
441  .size[0] = 8,
442  .associativity = 2,
443  .line_size = 32,
444  },
445 // 0x0B
446  {.descriptor = 0x0B,
447  .level = 1,
449  .size[0] = 4096,
450  .associativity = 4,
451  .entries = 4,
452  },
453 // 0x0C
454  {.descriptor = 0x0C,
455  .level = 1,
456  .type = PAPI_MH_TYPE_DATA,
457  .size[0] = 16,
458  .associativity = 4,
459  .line_size = 32,
460  },
461 // 0x0D
462  {.descriptor = 0x0D,
463  .level = 1,
464  .type = PAPI_MH_TYPE_DATA,
465  .size[0] = 16,
466  .associativity = 4,
467  .line_size = 64,
468  },
469 // 0x0E
470  {.descriptor = 0x0E,
471  .level = 1,
472  .type = PAPI_MH_TYPE_DATA,
473  .size[0] = 24,
474  .associativity = 6,
475  .line_size = 64,
476  },
477 // 0x21
478  {.descriptor = 0x21,
479  .level = 2,
480  .type = PAPI_MH_TYPE_UNIFIED,
481  .size[0] = 256,
482  .associativity = 8,
483  .line_size = 64,
484  },
485 // 0x22
486  {.descriptor = 0x22,
487  .level = 3,
488  .type = PAPI_MH_TYPE_UNIFIED,
489  .size[0] = 512,
490  .associativity = 4,
491  .sector = 1,
492  .line_size = 64,
493  },
494 // 0x23
495  {.descriptor = 0x23,
496  .level = 3,
497  .type = PAPI_MH_TYPE_UNIFIED,
498  .size[0] = 1024,
499  .associativity = 8,
500  .sector = 1,
501  .line_size = 64,
502  },
503 // 0x25
504  {.descriptor = 0x25,
505  .level = 3,
506  .type = PAPI_MH_TYPE_UNIFIED,
507  .size[0] = 2048,
508  .associativity = 8,
509  .sector = 1,
510  .line_size = 64,
511  },
512 // 0x29
513  {.descriptor = 0x29,
514  .level = 3,
515  .type = PAPI_MH_TYPE_UNIFIED,
516  .size[0] = 4096,
517  .associativity = 8,
518  .sector = 1,
519  .line_size = 64,
520  },
521 // 0x2C
522  {.descriptor = 0x2C,
523  .level = 1,
524  .type = PAPI_MH_TYPE_DATA,
525  .size[0] = 32,
526  .associativity = 8,
527  .line_size = 64,
528  },
529 // 0x30
530  {.descriptor = 0x30,
531  .level = 1,
532  .type = PAPI_MH_TYPE_INST,
533  .size[0] = 32,
534  .associativity = 8,
535  .line_size = 64,
536  },
537 // 0x39
538  {.descriptor = 0x39,
539  .level = 2,
540  .type = PAPI_MH_TYPE_UNIFIED,
541  .size[0] = 128,
542  .associativity = 4,
543  .sector = 1,
544  .line_size = 64,
545  },
546 // 0x3A
547  {.descriptor = 0x3A,
548  .level = 2,
549  .type = PAPI_MH_TYPE_UNIFIED,
550  .size[0] = 192,
551  .associativity = 6,
552  .sector = 1,
553  .line_size = 64,
554  },
555 // 0x3B
556  {.descriptor = 0x3B,
557  .level = 2,
558  .type = PAPI_MH_TYPE_UNIFIED,
559  .size[0] = 128,
560  .associativity = 2,
561  .sector = 1,
562  .line_size = 64,
563  },
564 // 0x3C
565  {.descriptor = 0x3C,
566  .level = 2,
567  .type = PAPI_MH_TYPE_UNIFIED,
568  .size[0] = 256,
569  .associativity = 4,
570  .sector = 1,
571  .line_size = 64,
572  },
573 // 0x3D
574  {.descriptor = 0x3D,
575  .level = 2,
576  .type = PAPI_MH_TYPE_UNIFIED,
577  .size[0] = 384,
578  .associativity = 6,
579  .sector = 1,
580  .line_size = 64,
581  },
582 // 0x3E
583  {.descriptor = 0x3E,
584  .level = 2,
585  .type = PAPI_MH_TYPE_UNIFIED,
586  .size[0] = 512,
587  .associativity = 4,
588  .sector = 1,
589  .line_size = 64,
590  },
591 // 0x40: no last level cache (??)
592 // 0x41
593  {.descriptor = 0x41,
594  .level = 2,
595  .type = PAPI_MH_TYPE_UNIFIED,
596  .size[0] = 128,
597  .associativity = 4,
598  .line_size = 32,
599  },
600 // 0x42
601  {.descriptor = 0x42,
602  .level = 2,
603  .type = PAPI_MH_TYPE_UNIFIED,
604  .size[0] = 256,
605  .associativity = 4,
606  .line_size = 32,
607  },
608 // 0x43
609  {.descriptor = 0x43,
610  .level = 2,
611  .type = PAPI_MH_TYPE_UNIFIED,
612  .size[0] = 512,
613  .associativity = 4,
614  .line_size = 32,
615  },
616 // 0x44
617  {.descriptor = 0x44,
618  .level = 2,
619  .type = PAPI_MH_TYPE_UNIFIED,
620  .size[0] = 1024,
621  .associativity = 4,
622  .line_size = 32,
623  },
624 // 0x45
625  {.descriptor = 0x45,
626  .level = 2,
627  .type = PAPI_MH_TYPE_UNIFIED,
628  .size[0] = 2048,
629  .associativity = 4,
630  .line_size = 32,
631  },
632 // 0x46
633  {.descriptor = 0x46,
634  .level = 3,
635  .type = PAPI_MH_TYPE_UNIFIED,
636  .size[0] = 4096,
637  .associativity = 4,
638  .line_size = 64,
639  },
640 // 0x47
641  {.descriptor = 0x47,
642  .level = 3,
643  .type = PAPI_MH_TYPE_UNIFIED,
644  .size[0] = 8192,
645  .associativity = 8,
646  .line_size = 64,
647  },
648 // 0x48
649  {.descriptor = 0x48,
650  .level = 2,
651  .type = PAPI_MH_TYPE_UNIFIED,
652  .size[0] = 3072,
653  .associativity = 12,
654  .line_size = 64,
655  },
656 // 0x49 NOTE: for family 0x0F model 0x06 this is level 3
657  {.descriptor = 0x49,
658  .level = 2,
659  .type = PAPI_MH_TYPE_UNIFIED,
660  .size[0] = 4096,
661  .associativity = 16,
662  .line_size = 64,
663  },
664 // 0x4A
665  {.descriptor = 0x4A,
666  .level = 3,
667  .type = PAPI_MH_TYPE_UNIFIED,
668  .size[0] = 6144,
669  .associativity = 12,
670  .line_size = 64,
671  },
672 // 0x4B
673  {.descriptor = 0x4B,
674  .level = 3,
675  .type = PAPI_MH_TYPE_UNIFIED,
676  .size[0] = 8192,
677  .associativity = 16,
678  .line_size = 64,
679  },
680 // 0x4C
681  {.descriptor = 0x4C,
682  .level = 3,
683  .type = PAPI_MH_TYPE_UNIFIED,
684  .size[0] = 12288,
685  .associativity = 12,
686  .line_size = 64,
687  },
688 // 0x4D
689  {.descriptor = 0x4D,
690  .level = 3,
691  .type = PAPI_MH_TYPE_UNIFIED,
692  .size[0] = 16384,
693  .associativity = 16,
694  .line_size = 64,
695  },
696 // 0x4E
697  {.descriptor = 0x4E,
698  .level = 2,
699  .type = PAPI_MH_TYPE_UNIFIED,
700  .size[0] = 6144,
701  .associativity = 24,
702  .line_size = 64,
703  },
704 // 0x4F
705  {.descriptor = 0x4F,
706  .level = 1,
708  .size[0] = 4,
709  .associativity = SHRT_MAX,
710  .entries = 32,
711  },
712 // 0x50
713  {.descriptor = 0x50,
714  .level = 1,
716  .size = {4, 2048, 4096},
717  .associativity = SHRT_MAX,
718  .entries = 64,
719  },
720 // 0x51
721  {.descriptor = 0x51,
722  .level = 1,
724  .size = {4, 2048, 4096},
725  .associativity = SHRT_MAX,
726  .entries = 128,
727  },
728 // 0x52
729  {.descriptor = 0x52,
730  .level = 1,
732  .size = {4, 2048, 4096},
733  .associativity = SHRT_MAX,
734  .entries = 256,
735  },
736 // 0x55
737  {.descriptor = 0x55,
738  .level = 1,
740  .size = {2048, 4096, 0},
741  .associativity = SHRT_MAX,
742  .entries = 7,
743  },
744 // 0x56
745  {.descriptor = 0x56,
746  .level = 1,
748  .size[0] = 4096,
749  .associativity = 4,
750  .entries = 16,
751  },
752 // 0x57
753  {.descriptor = 0x57,
754  .level = 1,
756  .size[0] = 4,
757  .associativity = 4,
758  .entries = 16,
759  },
760 // 0x59
761  {.descriptor = 0x59,
762  .level = 1,
764  .size[0] = 4,
765  .associativity = SHRT_MAX,
766  .entries = 16,
767  },
768 // 0x5A
769  {.descriptor = 0x5A,
770  .level = 1,
772  .size = {2048, 4096, 0},
773  .associativity = 4,
774  .entries = 32,
775  },
776 // 0x5B
777  {.descriptor = 0x5B,
778  .level = 1,
780  .size = {4, 4096, 0},
781  .associativity = SHRT_MAX,
782  .entries = 64,
783  },
784 // 0x5C
785  {.descriptor = 0x5C,
786  .level = 1,
788  .size = {4, 4096, 0},
789  .associativity = SHRT_MAX,
790  .entries = 128,
791  },
792 // 0x5D
793  {.descriptor = 0x5D,
794  .level = 1,
796  .size = {4, 4096, 0},
797  .associativity = SHRT_MAX,
798  .entries = 256,
799  },
800 // 0x60
801  {.descriptor = 0x60,
802  .level = 1,
803  .type = PAPI_MH_TYPE_DATA,
804  .size[0] = 16,
805  .associativity = 8,
806  .sector = 1,
807  .line_size = 64,
808  },
809 // 0x66
810  {.descriptor = 0x66,
811  .level = 1,
812  .type = PAPI_MH_TYPE_DATA,
813  .size[0] = 8,
814  .associativity = 4,
815  .sector = 1,
816  .line_size = 64,
817  },
818 // 0x67
819  {.descriptor = 0x67,
820  .level = 1,
821  .type = PAPI_MH_TYPE_DATA,
822  .size[0] = 16,
823  .associativity = 4,
824  .sector = 1,
825  .line_size = 64,
826  },
827 // 0x68
828  {.descriptor = 0x68,
829  .level = 1,
830  .type = PAPI_MH_TYPE_DATA,
831  .size[0] = 32,
832  .associativity = 4,
833  .sector = 1,
834  .line_size = 64,
835  },
836 // 0x70
837  {.descriptor = 0x70,
838  .level = 1,
839  .type = PAPI_MH_TYPE_TRACE,
840  .size[0] = 12,
841  .associativity = 8,
842  },
843 // 0x71
844  {.descriptor = 0x71,
845  .level = 1,
846  .type = PAPI_MH_TYPE_TRACE,
847  .size[0] = 16,
848  .associativity = 8,
849  },
850 // 0x72
851  {.descriptor = 0x72,
852  .level = 1,
853  .type = PAPI_MH_TYPE_TRACE,
854  .size[0] = 32,
855  .associativity = 8,
856  },
857 // 0x73
858  {.descriptor = 0x73,
859  .level = 1,
860  .type = PAPI_MH_TYPE_TRACE,
861  .size[0] = 64,
862  .associativity = 8,
863  },
864 // 0x78
865  {.descriptor = 0x78,
866  .level = 2,
867  .type = PAPI_MH_TYPE_UNIFIED,
868  .size[0] = 1024,
869  .associativity = 4,
870  .line_size = 64,
871  },
872 // 0x79
873  {.descriptor = 0x79,
874  .level = 2,
875  .type = PAPI_MH_TYPE_UNIFIED,
876  .size[0] = 128,
877  .associativity = 8,
878  .sector = 1,
879  .line_size = 64,
880  },
881 // 0x7A
882  {.descriptor = 0x7A,
883  .level = 2,
884  .type = PAPI_MH_TYPE_UNIFIED,
885  .size[0] = 256,
886  .associativity = 8,
887  .sector = 1,
888  .line_size = 64,
889  },
890 // 0x7B
891  {.descriptor = 0x7B,
892  .level = 2,
893  .type = PAPI_MH_TYPE_UNIFIED,
894  .size[0] = 512,
895  .associativity = 8,
896  .sector = 1,
897  .line_size = 64,
898  },
899 // 0x7C
900  {.descriptor = 0x7C,
901  .level = 2,
902  .type = PAPI_MH_TYPE_UNIFIED,
903  .size[0] = 1024,
904  .associativity = 8,
905  .sector = 1,
906  .line_size = 64,
907  },
908 // 0x7D
909  {.descriptor = 0x7D,
910  .level = 2,
911  .type = PAPI_MH_TYPE_UNIFIED,
912  .size[0] = 2048,
913  .associativity = 8,
914  .line_size = 64,
915  },
916 // 0x7F
917  {.descriptor = 0x7F,
918  .level = 2,
919  .type = PAPI_MH_TYPE_UNIFIED,
920  .size[0] = 512,
921  .associativity = 2,
922  .line_size = 64,
923  },
924 // 0x80
925  {.descriptor = 0x80,
926  .level = 2,
927  .type = PAPI_MH_TYPE_UNIFIED,
928  .size[0] = 512,
929  .associativity = 8,
930  .line_size = 64,
931  },
932 // 0x82
933  {.descriptor = 0x82,
934  .level = 2,
935  .type = PAPI_MH_TYPE_UNIFIED,
936  .size[0] = 256,
937  .associativity = 8,
938  .line_size = 32,
939  },
940 // 0x83
941  {.descriptor = 0x83,
942  .level = 2,
943  .type = PAPI_MH_TYPE_UNIFIED,
944  .size[0] = 512,
945  .associativity = 8,
946  .line_size = 32,
947  },
948 // 0x84
949  {.descriptor = 0x84,
950  .level = 2,
951  .type = PAPI_MH_TYPE_UNIFIED,
952  .size[0] = 1024,
953  .associativity = 8,
954  .line_size = 32,
955  },
956 // 0x85
957  {.descriptor = 0x85,
958  .level = 2,
959  .type = PAPI_MH_TYPE_UNIFIED,
960  .size[0] = 2048,
961  .associativity = 8,
962  .line_size = 32,
963  },
964 // 0x86
965  {.descriptor = 0x86,
966  .level = 2,
967  .type = PAPI_MH_TYPE_UNIFIED,
968  .size[0] = 512,
969  .associativity = 4,
970  .line_size = 64,
971  },
972 // 0x87
973  {.descriptor = 0x87,
974  .level = 2,
975  .type = PAPI_MH_TYPE_UNIFIED,
976  .size[0] = 1024,
977  .associativity = 8,
978  .line_size = 64,
979  },
980 // 0xB0
981  {.descriptor = 0xB0,
982  .level = 1,
984  .size[0] = 4,
985  .associativity = 4,
986  .entries = 128,
987  },
988 // 0xB1 NOTE: This is currently the only instance where .entries
989 // is dependent on .size. It's handled as a code exception.
990 // If other instances appear in the future, the structure
991 // should probably change to accomodate it.
992  {.descriptor = 0xB1,
993  .level = 1,
995  .size = {2048, 4096, 0},
996  .associativity = 4,
997  .entries = 8, /* or 4 if size = 4096 */
998  },
999 // 0xB2
1000  {.descriptor = 0xB2,
1001  .level = 1,
1003  .size[0] = 4,
1004  .associativity = 4,
1005  .entries = 64,
1006  },
1007 // 0xB3
1008  {.descriptor = 0xB3,
1009  .level = 1,
1011  .size[0] = 4,
1012  .associativity = 4,
1013  .entries = 128,
1014  },
1015 // 0xB4
1016  {.descriptor = 0xB4,
1017  .level = 1,
1019  .size[0] = 4,
1020  .associativity = 4,
1021  .entries = 256,
1022  },
1023 // 0xBA
1024  {.descriptor = 0xBA,
1025  .level = 1,
1027  .size[0] = 4,
1028  .associativity = 4,
1029  .entries = 64,
1030  },
1031 // 0xC0
1032  {.descriptor = 0xBA,
1033  .level = 1,
1035  .size = {4,4096},
1036  .associativity = 4,
1037  .entries = 8,
1038  },
1039 // 0xCA
1040  {.descriptor = 0xCA,
1041  .level = 2,
1043  .size[0] = 4,
1044  .associativity = 4,
1045  .entries = 512,
1046  },
1047 // 0xD0
1048  {.descriptor = 0xD0,
1049  .level = 3,
1050  .type = PAPI_MH_TYPE_UNIFIED,
1051  .size[0] = 512,
1052  .associativity = 4,
1053  .line_size = 64,
1054  },
1055 // 0xD1
1056  {.descriptor = 0xD1,
1057  .level = 3,
1058  .type = PAPI_MH_TYPE_UNIFIED,
1059  .size[0] = 1024,
1060  .associativity = 4,
1061  .line_size = 64,
1062  },
1063 // 0xD2
1064  {.descriptor = 0xD2,
1065  .level = 3,
1066  .type = PAPI_MH_TYPE_UNIFIED,
1067  .size[0] = 2048,
1068  .associativity = 4,
1069  .line_size = 64,
1070  },
1071 // 0xD6
1072  {.descriptor = 0xD6,
1073  .level = 3,
1074  .type = PAPI_MH_TYPE_UNIFIED,
1075  .size[0] = 1024,
1076  .associativity = 8,
1077  .line_size = 64,
1078  },
1079 // 0xD7
1080  {.descriptor = 0xD7,
1081  .level = 3,
1082  .type = PAPI_MH_TYPE_UNIFIED,
1083  .size[0] = 2048,
1084  .associativity = 8,
1085  .line_size = 64,
1086  },
1087 // 0xD8
1088  {.descriptor = 0xD8,
1089  .level = 3,
1090  .type = PAPI_MH_TYPE_UNIFIED,
1091  .size[0] = 4096,
1092  .associativity = 8,
1093  .line_size = 64,
1094  },
1095 // 0xDC
1096  {.descriptor = 0xDC,
1097  .level = 3,
1098  .type = PAPI_MH_TYPE_UNIFIED,
1099  .size[0] = 1536,
1100  .associativity = 12,
1101  .line_size = 64,
1102  },
1103 // 0xDD
1104  {.descriptor = 0xDD,
1105  .level = 3,
1106  .type = PAPI_MH_TYPE_UNIFIED,
1107  .size[0] = 3072,
1108  .associativity = 12,
1109  .line_size = 64,
1110  },
1111 // 0xDE
1112  {.descriptor = 0xDE,
1113  .level = 3,
1114  .type = PAPI_MH_TYPE_UNIFIED,
1115  .size[0] = 6144,
1116  .associativity = 12,
1117  .line_size = 64,
1118  },
1119 // 0xE2
1120  {.descriptor = 0xE2,
1121  .level = 3,
1122  .type = PAPI_MH_TYPE_UNIFIED,
1123  .size[0] = 2048,
1124  .associativity = 16,
1125  .line_size = 64,
1126  },
1127 // 0xE3
1128  {.descriptor = 0xE3,
1129  .level = 3,
1130  .type = PAPI_MH_TYPE_UNIFIED,
1131  .size[0] = 4096,
1132  .associativity = 16,
1133  .line_size = 64,
1134  },
1135 // 0xE4
1136  {.descriptor = 0xE4,
1137  .level = 3,
1138  .type = PAPI_MH_TYPE_UNIFIED,
1139  .size[0] = 8192,
1140  .associativity = 16,
1141  .line_size = 64,
1142  },
1143 // 0xEA
1144  {.descriptor = 0xEA,
1145  .level = 3,
1146  .type = PAPI_MH_TYPE_UNIFIED,
1147  .size[0] = 12288,
1148  .associativity = 24,
1149  .line_size = 64,
1150  },
1151 // 0xEB
1152  {.descriptor = 0xEB,
1153  .level = 3,
1154  .type = PAPI_MH_TYPE_UNIFIED,
1155  .size[0] = 18432,
1156  .associativity = 24,
1157  .line_size = 64,
1158  },
1159 // 0xEC
1160  {.descriptor = 0xEC,
1161  .level = 3,
1162  .type = PAPI_MH_TYPE_UNIFIED,
1163  .size[0] = 24576,
1164  .associativity = 24,
1165  .line_size = 64,
1166  },
1167 // 0xF0
1168  {.descriptor = 0xF0,
1169  .level = 1,
1170  .type = PAPI_MH_TYPE_PREF,
1171  .size[0] = 64,
1172  },
1173 // 0xF1
1174  {.descriptor = 0xF1,
1175  .level = 1,
1176  .type = PAPI_MH_TYPE_PREF,
1177  .size[0] = 128,
1178  },
1179 };
1180 
1181 #ifdef DEBUG
1182 static void
1184 {
1185  int i, j, k =
1186  ( int ) ( sizeof ( intel_cache ) /
1187  sizeof ( struct _intel_cache_info ) );
1188  for ( i = 0; i < k; i++ ) {
1189  printf( "%d.\tDescriptor: %#x\n", i, intel_cache[i].descriptor );
1190  printf( "\t Level: %d\n", intel_cache[i].level );
1191  printf( "\t Type: %d\n", intel_cache[i].type );
1192  printf( "\t Size(s): " );
1193  for ( j = 0; j < TLB_SIZES; j++ )
1194  printf( "%d, ", intel_cache[i].size[j] );
1195  printf( "\n" );
1196  printf( "\t Assoc: %d\n", intel_cache[i].associativity );
1197  printf( "\t Sector: %d\n", intel_cache[i].sector );
1198  printf( "\t Line Size: %d\n", intel_cache[i].line_size );
1199  printf( "\t Entries: %d\n", intel_cache[i].entries );
1200  printf( "\n" );
1201  }
1202 }
1203 #endif
1204 
1205 /* Given a specific cache descriptor, this routine decodes the information from a table
1206  * of such descriptors and fills out one or more records in a PAPI data structure.
1207  * Called only by init_intel()
1208  */
1209 static void
1211 {
1212  int i, next;
1213  int level = d->level - 1;
1216 
1217  if ( d->descriptor == 0x49 ) { /* special case */
1218  unsigned int r_eax, r_ebx, r_ecx, r_edx;
1219  r_eax = 0x1; /* function code 1: family & model */
1220  cpuid( &r_eax, &r_ebx, &r_ecx, &r_edx );
1221  /* override table for Family F, model 6 only */
1222  if ( ( r_eax & 0x0FFF3FF0 ) == 0xF60 )
1223  level = 3;
1224  }
1225  if ( d->type & PAPI_MH_TYPE_TLB ) {
1226  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1227  if ( L[level].tlb[next].type == PAPI_MH_TYPE_EMPTY )
1228  break;
1229  }
1230  /* expand TLB entries for multiple possible page sizes */
1231  for ( i = 0; i < TLB_SIZES && next < PAPI_MH_MAX_LEVELS && d->size[i];
1232  i++, next++ ) {
1233 // printf("Level %d Descriptor: %#x TLB type %#x next: %d, i: %d\n", level, d->descriptor, d->type, next, i);
1234  t = &L[level].tlb[next];
1235  t->type = PAPI_MH_CACHE_TYPE( d->type );
1236  t->num_entries = d->entries;
1237  t->page_size = d->size[i] << 10; /* minimum page size in KB */
1238  t->associativity = d->associativity;
1239  /* another special case */
1240  if ( d->descriptor == 0xB1 && d->size[i] == 4096 )
1241  t->num_entries = d->entries / 2;
1242  }
1243  } else {
1244  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1245  if ( L[level].cache[next].type == PAPI_MH_TYPE_EMPTY )
1246  break;
1247  }
1248 // printf("Level %d Descriptor: %#x Cache type %#x next: %d\n", level, d->descriptor, d->type, next);
1249  c = &L[level].cache[next];
1250  c->type = PAPI_MH_CACHE_TYPE( d->type );
1251  c->size = d->size[0] << 10; /* convert from KB to bytes */
1252  c->associativity = d->associativity;
1253  if ( d->line_size ) {
1254  c->line_size = d->line_size;
1255  c->num_lines = c->size / c->line_size;
1256  }
1257  }
1258 }
1259 
1260 #if defined(__amd64__) || defined(__x86_64__)
1261 static inline void
1262 cpuid2( unsigned int*eax, unsigned int* ebx,
1263  unsigned int*ecx, unsigned int *edx,
1264  unsigned int index, unsigned int ecx_in )
1265 {
1266  __asm__ __volatile__ ("cpuid;"
1267  : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
1268  : "0" (index), "2"(ecx_in) );
1269 }
1270 #else
1271 static inline void
1272 cpuid2 ( unsigned int* eax, unsigned int* ebx,
1273  unsigned int* ecx, unsigned int* edx,
1274  unsigned int index, unsigned int ecx_in )
1275 {
1276  unsigned int a,b,c,d;
1277  __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
1278  : "=a" (a), "=S" (b), "=c" (c), "=d" (d) \
1279  : "0" (index), "2"(ecx_in) );
1280  *eax = a; *ebx = b; *ecx = c; *edx = d;
1281 }
1282 #endif
1283 
1284 
1285 static int
1286 init_intel_leaf4( PAPI_mh_info_t * mh_info, int *num_levels )
1287 {
1288 
1289  unsigned int eax, ebx, ecx, edx;
1290  unsigned int maxidx, ecx_in;
1291  int next;
1292 
1293  int cache_type,cache_level,cache_selfinit,cache_fullyassoc;
1294  int cache_linesize,cache_partitions,cache_ways,cache_sets;
1295 
1297 
1298  *num_levels=0;
1299 
1300  cpuid2(&eax,&ebx,&ecx,&edx, 0, 0);
1301  maxidx = eax;
1302 
1303  if (maxidx<4) {
1304  MEMDBG("Warning! CPUID Index 4 not supported!\n");
1305  return PAPI_ENOSUPP;
1306  }
1307 
1308  ecx_in=0;
1309  while(1) {
1310  cpuid2(&eax,&ebx,&ecx,&edx, 4, ecx_in);
1311 
1312 
1313 
1314  /* decoded as per table 3-12 in Intel Software Developer's Manual Volume 2A */
1315 
1316  cache_type=eax&0x1f;
1317  if (cache_type==0) break;
1318 
1319  cache_level=(eax>>5)&0x3;
1320  cache_selfinit=(eax>>8)&0x1;
1321  cache_fullyassoc=(eax>>9)&0x1;
1322 
1323  cache_linesize=(ebx&0xfff)+1;
1324  cache_partitions=((ebx>>12)&0x3ff)+1;
1325  cache_ways=((ebx>>22)&0x3ff)+1;
1326 
1327  cache_sets=(ecx)+1;
1328 
1329  /* should we export this info?
1330 
1331  cache_maxshare=((eax>>14)&0xfff)+1;
1332  cache_maxpackage=((eax>>26)&0x3f)+1;
1333 
1334  cache_wb=(edx)&1;
1335  cache_inclusive=(edx>>1)&1;
1336  cache_indexing=(edx>>2)&1;
1337  */
1338 
1339  if (cache_level>*num_levels) *num_levels=cache_level;
1340 
1341  /* find next slot available to hold cache info */
1342  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1343  if ( mh_info->level[cache_level-1].cache[next].type == PAPI_MH_TYPE_EMPTY ) break;
1344  }
1345 
1346  c=&(mh_info->level[cache_level-1].cache[next]);
1347 
1348  switch(cache_type) {
1349  case 1: MEMDBG("L%d Data Cache\n",cache_level);
1351  break;
1352  case 2: MEMDBG("L%d Instruction Cache\n",cache_level);
1354  break;
1355  case 3: MEMDBG("L%d Unified Cache\n",cache_level);
1357  break;
1358  }
1359 
1360  if (cache_selfinit) { MEMDBG("\tSelf-init\n"); }
1361  if (cache_fullyassoc) { MEMDBG("\tFully Associtative\n"); }
1362 
1363  //MEMDBG("\tMax logical processors sharing cache: %d\n",cache_maxshare);
1364  //MEMDBG("\tMax logical processors sharing package: %d\n",cache_maxpackage);
1365 
1366  MEMDBG("\tCache linesize: %d\n",cache_linesize);
1367 
1368  MEMDBG("\tCache partitions: %d\n",cache_partitions);
1369  MEMDBG("\tCache associaticity: %d\n",cache_ways);
1370 
1371  MEMDBG("\tCache sets: %d\n",cache_sets);
1372  MEMDBG("\tCache size = %dkB\n",
1373  (cache_ways*cache_partitions*cache_linesize*cache_sets)/1024);
1374 
1375  //MEMDBG("\tWBINVD/INVD acts on lower caches: %d\n",cache_wb);
1376  //MEMDBG("\tCache is not inclusive: %d\n",cache_inclusive);
1377  //MEMDBG("\tComplex cache indexing: %d\n",cache_indexing);
1378 
1379  c->line_size=cache_linesize;
1380  if (cache_fullyassoc) {
1381  c->associativity=SHRT_MAX;
1382  }
1383  else {
1384  c->associativity=cache_ways;
1385  }
1386  c->size=(cache_ways*cache_partitions*cache_linesize*cache_sets);
1387  c->num_lines=cache_ways*cache_partitions*cache_sets;
1388 
1389  ecx_in++;
1390  }
1391  return PAPI_OK;
1392 }
1393 
1394 static int
1395 init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels)
1396 {
1397  /* cpuid() returns memory copies of 4 32-bit registers
1398  * this union allows them to be accessed as either registers
1399  * or individual bytes. Remember that Intel is little-endian.
1400  */
1401  union
1402  {
1403  struct
1404  {
1405  unsigned int ax, bx, cx, dx;
1406  } e;
1407  unsigned char descrip[16];
1408  } reg;
1409 
1410  int r; /* register boundary index */
1411  int b; /* byte index into a register */
1412  int i; /* byte index into the descrip array */
1413  int t; /* table index into the static descriptor table */
1414  int count; /* how many times to call cpuid; from eax:lsb */
1415  int size; /* size of the descriptor table */
1416  int last_level = 0; /* how many levels in the cache hierarchy */
1417 
1418  /* All of Intel's cache info is in 1 call to cpuid
1419  * however it is a table lookup :(
1420  */
1421  MEMDBG( "Initializing Intel Cache and TLB descriptors\n" );
1422 
1423 #ifdef DEBUG
1424  if ( ISLEVEL( DEBUG_MEMORY ) )
1426 #endif
1427 
1428  reg.e.ax = 0x2; /* function code 2: cache descriptors */
1429  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
1430 
1431  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
1432  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
1433  MEMDBG
1434  ( ":\nd0: %#x %#x %#x %#x\nd1: %#x %#x %#x %#x\nd2: %#x %#x %#x %#x\nd3: %#x %#x %#x %#x\n",
1435  reg.descrip[0], reg.descrip[1], reg.descrip[2], reg.descrip[3],
1436  reg.descrip[4], reg.descrip[5], reg.descrip[6], reg.descrip[7],
1437  reg.descrip[8], reg.descrip[9], reg.descrip[10], reg.descrip[11],
1438  reg.descrip[12], reg.descrip[13], reg.descrip[14], reg.descrip[15] );
1439 
1440  count = reg.descrip[0]; /* # times to repeat CPUID call. Not implemented. */
1441 
1442  /* Knights Corner at least returns 0 here */
1443  if (count==0) goto early_exit;
1444 
1445  size = ( sizeof ( intel_cache ) / sizeof ( struct _intel_cache_info ) ); /* # descriptors */
1446  MEMDBG( "Repeat cpuid(2,...) %d times. If not 1, code is broken.\n",
1447  count );
1448  if (count!=1) {
1449  fprintf(stderr,"Warning: Unhandled cpuid count of %d\n",count);
1450  }
1451 
1452  for ( r = 0; r < 4; r++ ) { /* walk the registers */
1453  if ( ( reg.descrip[r * 4 + 3] & 0x80 ) == 0 ) { /* only process if high order bit is 0 */
1454  for ( b = 3; b >= 0; b-- ) { /* walk the descriptor bytes from high to low */
1455  i = r * 4 + b; /* calculate an index into the array of descriptors */
1456  if ( i ) { /* skip the low order byte in eax [0]; it's the count (see above) */
1457  if ( reg.descrip[i] == 0xff ) {
1458  MEMDBG("Warning! PAPI x86_cache: must implement cpuid leaf 4\n");
1459  return PAPI_ENOSUPP;
1460  /* we might continue instead */
1461  /* in order to get TLB info */
1462  /* continue; */
1463  }
1464  for ( t = 0; t < size; t++ ) { /* walk the descriptor table */
1465  if ( reg.descrip[i] == intel_cache[t].descriptor ) { /* find match */
1466  if ( intel_cache[t].level > last_level )
1467  last_level = intel_cache[t].level;
1468  intel_decode_descriptor( &intel_cache[t],
1469  mh_info->level );
1470  }
1471  }
1472  }
1473  }
1474  }
1475  }
1476 early_exit:
1477  MEMDBG( "# of Levels: %d\n", last_level );
1478  *num_levels=last_level;
1479  return PAPI_OK;
1480 }
1481 
1482 
1483 static int
1484 init_intel( PAPI_mh_info_t * mh_info, int *levels )
1485 {
1486 
1487  int result;
1488  int num_levels;
1489 
1490  /* try using the oldest leaf2 method first */
1491  result=init_intel_leaf2(mh_info, &num_levels);
1492 
1493  if (result!=PAPI_OK) {
1494  /* All Core2 and newer also support leaf4 detection */
1495  /* Starting with Westmere *only* leaf4 is supported */
1496  result=init_intel_leaf4(mh_info, &num_levels);
1497  }
1498 
1499  *levels=num_levels;
1500  return PAPI_OK;
1501 }
1502 
1503 
1504 /* Returns 1 if hypervisor detected */
1505 /* Returns 0 if none found. */
1506 int
1507 _x86_detect_hypervisor(char *vendor_name)
1508 {
1509  unsigned int eax, ebx, ecx, edx;
1510  char hyper_vendor_id[13];
1511 
1512  cpuid2(&eax, &ebx, &ecx, &edx,0x1,0);
1513  /* This is the hypervisor bit, ecx bit 31 */
1514  if (ecx&0x80000000) {
1515  /* There are various values in the 0x4000000X range */
1516  /* It is questionable how standard they are */
1517  /* For now we just return the name. */
1518  cpuid2(&eax, &ebx, &ecx, &edx, 0x40000000,0);
1519  memcpy(hyper_vendor_id + 0, &ebx, 4);
1520  memcpy(hyper_vendor_id + 4, &ecx, 4);
1521  memcpy(hyper_vendor_id + 8, &edx, 4);
1522  hyper_vendor_id[12] = '\0';
1523  strncpy(vendor_name,hyper_vendor_id,PAPI_MAX_STR_LEN);
1524  return 1;
1525  }
1526  else {
1527  strncpy(vendor_name,"none",PAPI_MAX_STR_LEN);
1528  }
1529  return 0;
1530 }
static void intel_decode_descriptor(struct _intel_cache_info *d, PAPI_mh_level_t *L)
static void print_intel_cache_table()
int levels
Definition: papi.h:774
#define PAPI_MH_TYPE_INST
Definition: papi.h:730
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
static struct _intel_cache_info intel_cache[]
#define PAPI_ENOSUPP
Definition: papi.h:271
static int init_intel(PAPI_mh_info_t *mh_info, int *levels)
#define PAPI_MH_TYPE_WB
Definition: papi.h:737
static void cpuid2(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx, unsigned int index, unsigned int ecx_in)
static int init_intel_leaf2(PAPI_mh_info_t *mh_info, int *num_levels)
int size[TLB_SIZES]
int associativity
Definition: papi.h:753
return PAPI_OK
Definition: linux-nvml.c:497
int count
Definition: iozone.c:22422
#define PAPI_MH_TYPE_PSEUDO_LRU
Definition: papi.h:741
#define DEBUG_MEMORY
Definition: papi_debug.h:34
double c
Definition: multiplex.c:22
#define PAPI_MH_TYPE_TRACE
Definition: papi.h:733
Return codes and api definitions.
#define PAPI_MH_TYPE_DATA
Definition: papi.h:731
#define PAPI_MH_TYPE_TLB
Definition: papi.h:743
t
Definition: iozone.c:23562
#define PAPI_MH_MAX_LEVELS
Definition: papi.h:745
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:768
int i
Definition: fileop.c:140
int _x86_cache_info(PAPI_mh_info_t *mh_info)
char *long long size
Definition: iozone.c:12023
#define TLB_SIZES
int k
Definition: iozone.c:19136
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:775
PAPI_mh_tlb_info_t tlb[PAPI_MH_MAX_LEVELS]
Definition: papi.h:767
static void init_mem_hierarchy(PAPI_mh_info_t *mh_info)
#define MEMDBG(format, args...)
Definition: papi_debug.h:70
mh for mem hierarchy maybe?
Definition: papi.h:773
#define PAPI_ENOIMPL
Definition: papi.h:272
int _x86_detect_hypervisor(char *vendor_name)
nsize_list next
Definition: iozone.c:20053
printf("\tTry: -i 0 -i 1 \n\n")
int
Definition: iozone.c:18528
#define ISLEVEL(a)
Definition: papi_debug.h:54
static int init_intel_leaf4(PAPI_mh_info_t *mh_info, int *num_levels)
static short int _amd_L2_L3_assoc(unsigned short int pattern)
#define PAPI_MH_TYPE_UNIFIED
Definition: papi.h:734
#define PAPI_MAX_STR_LEN
Definition: papi.h:465
#define PAPI_MH_TYPE_EMPTY
Definition: papi.h:729
#define PAPI_MH_TYPE_PREF
Definition: papi.h:744
#define PAPI_MH_TYPE_WT
Definition: papi.h:736
long j
Definition: iozone.c:19135
ssize_t retval
Definition: libasync.c:338
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:39
static int init_amd(PAPI_mh_info_t *mh_info, int *levels)
#define PAPI_MH_CACHE_TYPE(a)
Definition: papi.h:735
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:38
int int op
Definition: iozone.c:19389
#define PAPI_MAX_MEM_HIERARCHY_LEVELS
Definition: papi.h:746
unsigned int pattern
Definition: iozone.c:1531