PAPI  5.3.2.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
x86_cpuid_info.c File Reference
Include dependency graph for x86_cpuid_info.c:

Go to the source code of this file.

Data Structures

struct  _intel_cache_info
 

Macros

#define TLB_SIZES   3 /* number of different page sizes for a single TLB descriptor */
 

Functions

static void init_mem_hierarchy (PAPI_mh_info_t *mh_info)
 
static int init_amd (PAPI_mh_info_t *mh_info, int *levels)
 
static short int _amd_L2_L3_assoc (unsigned short int pattern)
 
static int init_intel (PAPI_mh_info_t *mh_info, int *levels)
 
static void cpuid (unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
 
int _x86_cache_info (PAPI_mh_info_t *mh_info)
 
static void print_intel_cache_table ()
 
static void intel_decode_descriptor (struct _intel_cache_info *d, PAPI_mh_level_t *L)
 
static void cpuid2 (unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx, unsigned int index, unsigned int ecx_in)
 
static int init_intel_leaf4 (PAPI_mh_info_t *mh_info, int *num_levels)
 
static int init_intel_leaf2 (PAPI_mh_info_t *mh_info, int *num_levels)
 
int _x86_detect_hypervisor (char *vendor_name)
 

Variables

static struct _intel_cache_info intel_cache []
 

Macro Definition Documentation

#define TLB_SIZES   3 /* number of different page sizes for a single TLB descriptor */

Definition at line 360 of file x86_cpuid_info.c.

Function Documentation

static short int _amd_L2_L3_assoc ( unsigned short int  pattern)
static

Definition at line 116 of file x86_cpuid_info.c.

117 {
118  /* From "CPUID Specification" #25481 Rev 2.28, April 2008 */
119  short int assoc[16] =
120  { 0, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, SHRT_MAX };
121  if ( pattern > 0xF )
122  return -1;
123  return ( assoc[pattern] );
124 }
unsigned int pattern
Definition: iozone.c:1531

Here is the caller graph for this function:

int _x86_cache_info ( PAPI_mh_info_t mh_info)

Definition at line 51 of file x86_cpuid_info.c.

52 {
53  int retval = 0;
54  union
55  {
56  struct
57  {
58  unsigned int ax, bx, cx, dx;
59  } e;
60  char vendor[20]; /* leave room for terminator bytes */
61  } reg;
62 
63  /* Don't use cpu_type to determine the processor.
64  * get the information directly from the chip.
65  */
66  reg.e.ax = 0; /* function code 0: vendor string */
67  /* The vendor string is composed of EBX:EDX:ECX.
68  * by swapping the register addresses in the call below,
69  * the string is correctly composed in the char array.
70  */
71  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.dx, &reg.e.cx );
72  reg.vendor[16] = 0;
73  MEMDBG( "Vendor: %s\n", &reg.vendor[4] );
74 
75  init_mem_hierarchy( mh_info );
76 
77  if ( !strncmp( "GenuineIntel", &reg.vendor[4], 12 ) ) {
78  init_intel( mh_info, &mh_info->levels);
79  } else if ( !strncmp( "AuthenticAMD", &reg.vendor[4], 12 ) ) {
80  init_amd( mh_info, &mh_info->levels );
81  } else {
82  MEMDBG( "Unsupported cpu type; Not Intel or AMD x86\n" );
83  return PAPI_ENOIMPL;
84  }
85 
86  /* This works only because an empty cache element is initialized to 0 */
87  MEMDBG( "Detected L1: %d L2: %d L3: %d\n",
88  mh_info->level[0].cache[0].size + mh_info->level[0].cache[1].size,
89  mh_info->level[1].cache[0].size + mh_info->level[1].cache[1].size,
90  mh_info->level[2].cache[0].size + mh_info->level[2].cache[1].size );
91  return retval;
92 }
int levels
Definition: papi.h:769
#define PAPI_ENOIMPL
Definition: fpapi.h:124
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
static int init_intel(PAPI_mh_info_t *mh_info, int *levels)
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:763
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:770
static void init_mem_hierarchy(PAPI_mh_info_t *mh_info)
#define MEMDBG(format, args...)
Definition: papi_debug.h:70
ssize_t retval
Definition: libasync.c:338
static int init_amd(PAPI_mh_info_t *mh_info, int *levels)

Here is the call graph for this function:

int _x86_detect_hypervisor ( char *  vendor_name)

Definition at line 1514 of file x86_cpuid_info.c.

1515 {
1516  unsigned int eax, ebx, ecx, edx;
1517  char hyper_vendor_id[13];
1518 
1519  cpuid2(&eax, &ebx, &ecx, &edx,0x1,0);
1520  /* This is the hypervisor bit, ecx bit 31 */
1521  if (ecx&0x80000000) {
1522  /* There are various values in the 0x4000000X range */
1523  /* It is questionable how standard they are */
1524  /* For now we just return the name. */
1525  cpuid2(&eax, &ebx, &ecx, &edx, 0x40000000,0);
1526  memcpy(hyper_vendor_id + 0, &ebx, 4);
1527  memcpy(hyper_vendor_id + 4, &ecx, 4);
1528  memcpy(hyper_vendor_id + 8, &edx, 4);
1529  hyper_vendor_id[12] = '\0';
1530  strncpy(vendor_name,hyper_vendor_id,PAPI_MAX_STR_LEN);
1531  return 1;
1532  }
1533  else {
1534  strncpy(vendor_name,"none",PAPI_MAX_STR_LEN);
1535  }
1536  return 0;
1537 }
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
static void cpuid2(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx, unsigned int index, unsigned int ecx_in)

Here is the call graph for this function:

Here is the caller graph for this function:

static void cpuid ( unsigned int a,
unsigned int b,
unsigned int c,
unsigned int d 
)
inlinestatic

Definition at line 36 of file x86_cpuid_info.c.

37 {
38  unsigned int op = *a;
39  // .byte 0x53 == push ebx. it's universal for 32 and 64 bit
40  // .byte 0x5b == pop ebx.
41  // Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode.
42  // Using the opcode directly avoids this problem.
43  __asm__ __volatile__( ".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b":"=a"( *a ), "=S"( *b ), "=c"( *c ),
44  "=d"
45  ( *d )
46  : "a"( op ) );
47 }
double c
Definition: multiplex.c:22
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:37
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:38
int int op
Definition: iozone.c:19389

Here is the caller graph for this function:

static void cpuid2 ( unsigned int eax,
unsigned int ebx,
unsigned int ecx,
unsigned int edx,
unsigned int  index,
unsigned int  ecx_in 
)
inlinestatic

Definition at line 1273 of file x86_cpuid_info.c.

1276 {
1277  unsigned int a,b,c,d;
1278  __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
1279  : "=a" (a), "=S" (b), "=c" (c), "=d" (d) \
1280  : "0" (index), "2"(ecx_in) );
1281  *eax = a; *ebx = b; *ecx = c; *edx = d;
1282 }
double c
Definition: multiplex.c:22
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:37
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:38

Here is the caller graph for this function:

static int init_amd ( PAPI_mh_info_t mh_info,
int levels 
)
static

Definition at line 128 of file x86_cpuid_info.c.

129 {
130  union
131  {
132  struct
133  {
134  unsigned int ax, bx, cx, dx;
135  } e;
136  unsigned char byt[16];
137  } reg;
138  int i, j, levels = 0;
139  PAPI_mh_level_t *L = mh_info->level;
140 
141  /*
142  * Layout of CPU information taken from :
143  * "CPUID Specification" #25481 Rev 2.28, April 2008 for most current info.
144  */
145 
146  MEMDBG( "Initializing AMD memory info\n" );
147  /* AMD level 1 cache info */
148  reg.e.ax = 0x80000005; /* extended function code 5: L1 Cache and TLB Identifiers */
149  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
150 
151  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
152  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
153  MEMDBG
154  ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
155  reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
156  reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
157  reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
158  reg.byt[15] );
159 
160  /* NOTE: We assume L1 cache and TLB always exists */
161  /* L1 TLB info */
162 
163  /* 4MB memory page information; half the number of entries as 2MB */
164  L[0].tlb[0].type = PAPI_MH_TYPE_INST;
165  L[0].tlb[0].num_entries = reg.byt[0] / 2;
166  L[0].tlb[0].page_size = 4096 << 10;
167  L[0].tlb[0].associativity = reg.byt[1];
168 
169  L[0].tlb[1].type = PAPI_MH_TYPE_DATA;
170  L[0].tlb[1].num_entries = reg.byt[2] / 2;
171  L[0].tlb[1].page_size = 4096 << 10;
172  L[0].tlb[1].associativity = reg.byt[3];
173 
174  /* 2MB memory page information */
175  L[0].tlb[2].type = PAPI_MH_TYPE_INST;
176  L[0].tlb[2].num_entries = reg.byt[0];
177  L[0].tlb[2].page_size = 2048 << 10;
178  L[0].tlb[2].associativity = reg.byt[1];
179 
180  L[0].tlb[3].type = PAPI_MH_TYPE_DATA;
181  L[0].tlb[3].num_entries = reg.byt[2];
182  L[0].tlb[3].page_size = 2048 << 10;
183  L[0].tlb[3].associativity = reg.byt[3];
184 
185  /* 4k page information */
186  L[0].tlb[4].type = PAPI_MH_TYPE_INST;
187  L[0].tlb[4].num_entries = reg.byt[4];
188  L[0].tlb[4].page_size = 4 << 10;
189  L[0].tlb[4].associativity = reg.byt[5];
190 
191  L[0].tlb[5].type = PAPI_MH_TYPE_DATA;
192  L[0].tlb[5].num_entries = reg.byt[6];
193  L[0].tlb[5].page_size = 4 << 10;
194  L[0].tlb[5].associativity = reg.byt[7];
195 
196  for ( i = 0; i < PAPI_MH_MAX_LEVELS; i++ ) {
197  if ( L[0].tlb[i].associativity == 0xff )
198  L[0].tlb[i].associativity = SHRT_MAX;
199  }
200 
201  /* L1 D-cache info */
202  L[0].cache[0].type =
204  L[0].cache[0].size = reg.byt[11] << 10;
205  L[0].cache[0].associativity = reg.byt[10];
206  L[0].cache[0].line_size = reg.byt[8];
207  /* Byt[9] is "Lines per tag" */
208  /* Is that == lines per cache? */
209  /* L[0].cache[1].num_lines = reg.byt[9]; */
210  if ( L[0].cache[0].line_size )
211  L[0].cache[0].num_lines = L[0].cache[0].size / L[0].cache[0].line_size;
212  MEMDBG( "D-Cache Line Count: %d; Computed: %d\n", reg.byt[9],
213  L[0].cache[0].num_lines );
214 
215  /* L1 I-cache info */
216  L[0].cache[1].type = PAPI_MH_TYPE_INST;
217  L[0].cache[1].size = reg.byt[15] << 10;
218  L[0].cache[1].associativity = reg.byt[14];
219  L[0].cache[1].line_size = reg.byt[12];
220  /* Byt[13] is "Lines per tag" */
221  /* Is that == lines per cache? */
222  /* L[0].cache[1].num_lines = reg.byt[13]; */
223  if ( L[0].cache[1].line_size )
224  L[0].cache[1].num_lines = L[0].cache[1].size / L[0].cache[1].line_size;
225  MEMDBG( "I-Cache Line Count: %d; Computed: %d\n", reg.byt[13],
226  L[0].cache[1].num_lines );
227 
228  for ( i = 0; i < 2; i++ ) {
229  if ( L[0].cache[i].associativity == 0xff )
230  L[0].cache[i].associativity = SHRT_MAX;
231  }
232 
233  /* AMD L2/L3 Cache and L2 TLB info */
234  /* NOTE: For safety we assume L2 and L3 cache and TLB may not exist */
235 
236  reg.e.ax = 0x80000006; /* extended function code 6: L2/L3 Cache and L2 TLB Identifiers */
237  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
238 
239  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
240  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
241  MEMDBG
242  ( ":\neax: %#x %#x %#x %#x\nebx: %#x %#x %#x %#x\necx: %#x %#x %#x %#x\nedx: %#x %#x %#x %#x\n",
243  reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
244  reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
245  reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
246  reg.byt[15] );
247 
248  /* L2 TLB info */
249 
250  if ( reg.byt[0] | reg.byt[1] ) { /* Level 2 ITLB exists */
251  /* 4MB ITLB page information; half the number of entries as 2MB */
252  L[1].tlb[0].type = PAPI_MH_TYPE_INST;
253  L[1].tlb[0].num_entries =
254  ( ( ( short ) ( reg.byt[1] & 0xF ) << 8 ) + reg.byt[0] ) / 2;
255  L[1].tlb[0].page_size = 4096 << 10;
256  L[1].tlb[0].associativity =
257  _amd_L2_L3_assoc( ( reg.byt[1] & 0xF0 ) >> 4 );
258 
259  /* 2MB ITLB page information */
260  L[1].tlb[2].type = PAPI_MH_TYPE_INST;
261  L[1].tlb[2].num_entries = L[1].tlb[0].num_entries * 2;
262  L[1].tlb[2].page_size = 2048 << 10;
263  L[1].tlb[2].associativity = L[1].tlb[0].associativity;
264  }
265 
266  if ( reg.byt[2] | reg.byt[3] ) { /* Level 2 DTLB exists */
267  /* 4MB DTLB page information; half the number of entries as 2MB */
268  L[1].tlb[1].type = PAPI_MH_TYPE_DATA;
269  L[1].tlb[1].num_entries =
270  ( ( ( short ) ( reg.byt[3] & 0xF ) << 8 ) + reg.byt[2] ) / 2;
271  L[1].tlb[1].page_size = 4096 << 10;
272  L[1].tlb[1].associativity =
273  _amd_L2_L3_assoc( ( reg.byt[3] & 0xF0 ) >> 4 );
274 
275  /* 2MB DTLB page information */
276  L[1].tlb[3].type = PAPI_MH_TYPE_DATA;
277  L[1].tlb[3].num_entries = L[1].tlb[1].num_entries * 2;
278  L[1].tlb[3].page_size = 2048 << 10;
279  L[1].tlb[3].associativity = L[1].tlb[1].associativity;
280  }
281 
282  /* 4k page information */
283  if ( reg.byt[4] | reg.byt[5] ) { /* Level 2 ITLB exists */
284  L[1].tlb[4].type = PAPI_MH_TYPE_INST;
285  L[1].tlb[4].num_entries =
286  ( ( short ) ( reg.byt[5] & 0xF ) << 8 ) + reg.byt[4];
287  L[1].tlb[4].page_size = 4 << 10;
288  L[1].tlb[4].associativity =
289  _amd_L2_L3_assoc( ( reg.byt[5] & 0xF0 ) >> 4 );
290  }
291  if ( reg.byt[6] | reg.byt[7] ) { /* Level 2 DTLB exists */
292  L[1].tlb[5].type = PAPI_MH_TYPE_DATA;
293  L[1].tlb[5].num_entries =
294  ( ( short ) ( reg.byt[7] & 0xF ) << 8 ) + reg.byt[6];
295  L[1].tlb[5].page_size = 4 << 10;
296  L[1].tlb[5].associativity =
297  _amd_L2_L3_assoc( ( reg.byt[7] & 0xF0 ) >> 4 );
298  }
299 
300  /* AMD Level 2 cache info */
301  if ( reg.e.cx ) {
302  L[1].cache[0].type =
304  L[1].cache[0].size = ( int ) ( ( reg.e.cx & 0xffff0000 ) >> 6 ); /* right shift by 16; multiply by 2^10 */
305  L[1].cache[0].associativity =
306  _amd_L2_L3_assoc( ( reg.byt[9] & 0xF0 ) >> 4 );
307  L[1].cache[0].line_size = reg.byt[8];
308 /* L[1].cache[0].num_lines = reg.byt[9]&0xF; */
309  if ( L[1].cache[0].line_size )
310  L[1].cache[0].num_lines =
311  L[1].cache[0].size / L[1].cache[0].line_size;
312  MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[9] & 0xF,
313  L[1].cache[0].num_lines );
314  }
315 
316  /* AMD Level 3 cache info (shared across cores) */
317  if ( reg.e.dx ) {
318  L[2].cache[0].type =
320  L[2].cache[0].size = ( int ) ( reg.e.dx & 0xfffc0000 ) << 1; /* in blocks of 512KB (2^19) */
321  L[2].cache[0].associativity =
322  _amd_L2_L3_assoc( ( reg.byt[13] & 0xF0 ) >> 4 );
323  L[2].cache[0].line_size = reg.byt[12];
324 /* L[2].cache[0].num_lines = reg.byt[13]&0xF; */
325  if ( L[2].cache[0].line_size )
326  L[2].cache[0].num_lines =
327  L[2].cache[0].size / L[2].cache[0].line_size;
328  MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[13] & 0xF,
329  L[1].cache[0].num_lines );
330  }
331  for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
332  for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
333  /* Compute the number of levels of hierarchy actually used */
334  if ( L[i].tlb[j].type != PAPI_MH_TYPE_EMPTY ||
335  L[i].cache[j].type != PAPI_MH_TYPE_EMPTY )
336  levels = i + 1;
337  }
338  }
339  *num_levels = levels;
340  return PAPI_OK;
341 }
#define PAPI_MH_TYPE_INST
Definition: papi.h:725
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
#define PAPI_MH_TYPE_WB
Definition: papi.h:732
int associativity
Definition: papi.h:748
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_MH_TYPE_PSEUDO_LRU
Definition: papi.h:736
#define PAPI_MH_TYPE_DATA
Definition: papi.h:726
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:763
int i
Definition: fileop.c:140
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:770
PAPI_mh_tlb_info_t tlb[PAPI_MH_MAX_LEVELS]
Definition: papi.h:762
#define MEMDBG(format, args...)
Definition: papi_debug.h:70
int
Definition: iozone.c:18528
static short int _amd_L2_L3_assoc(unsigned short int pattern)
#define PAPI_MH_TYPE_UNIFIED
Definition: papi.h:729
#define PAPI_MH_TYPE_EMPTY
Definition: papi.h:724
#define PAPI_MH_TYPE_WT
Definition: papi.h:731
long j
Definition: iozone.c:19135
#define PAPI_MH_MAX_LEVELS
Definition: fpapi.h:87
#define PAPI_MAX_MEM_HIERARCHY_LEVELS
Definition: papi.h:741

Here is the call graph for this function:

Here is the caller graph for this function:

static int init_intel ( PAPI_mh_info_t mh_info,
int levels 
)
static

Definition at line 1491 of file x86_cpuid_info.c.

1492 {
1493 
1494  int result;
1495  int num_levels;
1496 
1497  /* try using the oldest leaf2 method first */
1498  result=init_intel_leaf2(mh_info, &num_levels);
1499 
1500  if (result!=PAPI_OK) {
1501  /* All Core2 and newer also support leaf4 detection */
1502  /* Starting with Westmere *only* leaf4 is supported */
1503  result=init_intel_leaf4(mh_info, &num_levels);
1504  }
1505 
1506  *levels=num_levels;
1507  return PAPI_OK;
1508 }
static int init_intel_leaf2(PAPI_mh_info_t *mh_info, int *num_levels)
return PAPI_OK
Definition: linux-nvml.c:458
static int init_intel_leaf4(PAPI_mh_info_t *mh_info, int *num_levels)

Here is the call graph for this function:

Here is the caller graph for this function:

static int init_intel_leaf2 ( PAPI_mh_info_t mh_info,
int num_levels 
)
static

Definition at line 1396 of file x86_cpuid_info.c.

1397 {
1398  /* cpuid() returns memory copies of 4 32-bit registers
1399  * this union allows them to be accessed as either registers
1400  * or individual bytes. Remember that Intel is little-endian.
1401  */
1402  union
1403  {
1404  struct
1405  {
1406  unsigned int ax, bx, cx, dx;
1407  } e;
1408  unsigned char descrip[16];
1409  } reg;
1410 
1411  int r; /* register boundary index */
1412  int b; /* byte index into a register */
1413  int i; /* byte index into the descrip array */
1414  int t; /* table index into the static descriptor table */
1415  int count; /* how many times to call cpuid; from eax:lsb */
1416  int size; /* size of the descriptor table */
1417  int last_level = 0; /* how many levels in the cache hierarchy */
1418 
1419  int need_leaf4=0;
1420 
1421  /* All of Intel's cache info is in 1 call to cpuid
1422  * however it is a table lookup :(
1423  */
1424  MEMDBG( "Initializing Intel Cache and TLB descriptors\n" );
1425 
1426 #ifdef DEBUG
1427  if ( ISLEVEL( DEBUG_MEMORY ) )
1429 #endif
1430 
1431  reg.e.ax = 0x2; /* function code 2: cache descriptors */
1432  cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
1433 
1434  MEMDBG( "e.ax=%#8.8x e.bx=%#8.8x e.cx=%#8.8x e.dx=%#8.8x\n",
1435  reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
1436  MEMDBG
1437  ( ":\nd0: %#x %#x %#x %#x\nd1: %#x %#x %#x %#x\nd2: %#x %#x %#x %#x\nd3: %#x %#x %#x %#x\n",
1438  reg.descrip[0], reg.descrip[1], reg.descrip[2], reg.descrip[3],
1439  reg.descrip[4], reg.descrip[5], reg.descrip[6], reg.descrip[7],
1440  reg.descrip[8], reg.descrip[9], reg.descrip[10], reg.descrip[11],
1441  reg.descrip[12], reg.descrip[13], reg.descrip[14], reg.descrip[15] );
1442 
1443  count = reg.descrip[0]; /* # times to repeat CPUID call. Not implemented. */
1444 
1445  /* Knights Corner at least returns 0 here */
1446  if (count==0) goto early_exit;
1447 
1448  size = ( sizeof ( intel_cache ) / sizeof ( struct _intel_cache_info ) ); /* # descriptors */
1449  MEMDBG( "Repeat cpuid(2,...) %d times. If not 1, code is broken.\n",
1450  count );
1451  if (count!=1) {
1452  fprintf(stderr,"Warning: Unhandled cpuid count of %d\n",count);
1453  }
1454 
1455  for ( r = 0; r < 4; r++ ) { /* walk the registers */
1456  if ( ( reg.descrip[r * 4 + 3] & 0x80 ) == 0 ) { /* only process if high order bit is 0 */
1457  for ( b = 3; b >= 0; b-- ) { /* walk the descriptor bytes from high to low */
1458  i = r * 4 + b; /* calculate an index into the array of descriptors */
1459  if ( i ) { /* skip the low order byte in eax [0]; it's the count (see above) */
1460  if ( reg.descrip[i] == 0xff ) {
1461  MEMDBG("Warning! PAPI x86_cache: must implement cpuid leaf 4\n");
1462  need_leaf4=1;
1463  return PAPI_ENOSUPP;
1464  /* we might continue instead */
1465  /* in order to get TLB info */
1466  /* continue; */
1467  }
1468  for ( t = 0; t < size; t++ ) { /* walk the descriptor table */
1469  if ( reg.descrip[i] == intel_cache[t].descriptor ) { /* find match */
1470  if ( intel_cache[t].level > last_level )
1471  last_level = intel_cache[t].level;
1473  mh_info->level );
1474  }
1475  }
1476  }
1477  }
1478  }
1479  }
1480 early_exit:
1481  MEMDBG( "# of Levels: %d\n", last_level );
1482  *num_levels=last_level;
1483  if (need_leaf4) {
1484  return PAPI_ENOSUPP;
1485  }
1486  return PAPI_OK;
1487 }
static void intel_decode_descriptor(struct _intel_cache_info *d, PAPI_mh_level_t *L)
static void print_intel_cache_table()
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
static struct _intel_cache_info intel_cache[]
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
#define DEBUG_MEMORY
Definition: papi_debug.h:34
t
Definition: iozone.c:23562
int i
Definition: fileop.c:140
#define PAPI_ENOSUPP
Definition: fpapi.h:123
char *long long size
Definition: iozone.c:12023
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:770
#define MEMDBG(format, args...)
Definition: papi_debug.h:70
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:38
#define ISLEVEL(a)
Definition: papi_debug.h:54

Here is the call graph for this function:

Here is the caller graph for this function:

static int init_intel_leaf4 ( PAPI_mh_info_t mh_info,
int num_levels 
)
static

Definition at line 1287 of file x86_cpuid_info.c.

1288 {
1289 
1290  unsigned int eax, ebx, ecx, edx;
1291  unsigned int maxidx, ecx_in;
1292  int next;
1293 
1294  int cache_type,cache_level,cache_selfinit,cache_fullyassoc;
1295  int cache_linesize,cache_partitions,cache_ways,cache_sets;
1296 
1298 
1299  *num_levels=0;
1300 
1301  cpuid2(&eax,&ebx,&ecx,&edx, 0, 0);
1302  maxidx = eax;
1303 
1304  if (maxidx<4) {
1305  MEMDBG("Warning! CPUID Index 4 not supported!\n");
1306  return PAPI_ENOSUPP;
1307  }
1308 
1309  ecx_in=0;
1310  while(1) {
1311  cpuid2(&eax,&ebx,&ecx,&edx, 4, ecx_in);
1312 
1313 
1314 
1315  /* decoded as per table 3-12 in Intel Software Developer's Manual Volume 2A */
1316 
1317  cache_type=eax&0x1f;
1318  if (cache_type==0) break;
1319 
1320  cache_level=(eax>>5)&0x3;
1321  cache_selfinit=(eax>>8)&0x1;
1322  cache_fullyassoc=(eax>>9)&0x1;
1323 
1324  cache_linesize=(ebx&0xfff)+1;
1325  cache_partitions=((ebx>>12)&0x3ff)+1;
1326  cache_ways=((ebx>>22)&0x3ff)+1;
1327 
1328  cache_sets=(ecx)+1;
1329 
1330  /* should we export this info?
1331 
1332  cache_maxshare=((eax>>14)&0xfff)+1;
1333  cache_maxpackage=((eax>>26)&0x3f)+1;
1334 
1335  cache_wb=(edx)&1;
1336  cache_inclusive=(edx>>1)&1;
1337  cache_indexing=(edx>>2)&1;
1338  */
1339 
1340  if (cache_level>*num_levels) *num_levels=cache_level;
1341 
1342  /* find next slot available to hold cache info */
1343  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1344  if ( mh_info->level[cache_level-1].cache[next].type == PAPI_MH_TYPE_EMPTY ) break;
1345  }
1346 
1347  c=&(mh_info->level[cache_level-1].cache[next]);
1348 
1349  switch(cache_type) {
1350  case 1: MEMDBG("L%d Data Cache\n",cache_level);
1352  break;
1353  case 2: MEMDBG("L%d Instruction Cache\n",cache_level);
1355  break;
1356  case 3: MEMDBG("L%d Unified Cache\n",cache_level);
1358  break;
1359  }
1360 
1361  if (cache_selfinit) { MEMDBG("\tSelf-init\n"); }
1362  if (cache_fullyassoc) { MEMDBG("\tFully Associtative\n"); }
1363 
1364  //MEMDBG("\tMax logical processors sharing cache: %d\n",cache_maxshare);
1365  //MEMDBG("\tMax logical processors sharing package: %d\n",cache_maxpackage);
1366 
1367  MEMDBG("\tCache linesize: %d\n",cache_linesize);
1368 
1369  MEMDBG("\tCache partitions: %d\n",cache_partitions);
1370  MEMDBG("\tCache associaticity: %d\n",cache_ways);
1371 
1372  MEMDBG("\tCache sets: %d\n",cache_sets);
1373  MEMDBG("\tCache size = %dkB\n",
1374  (cache_ways*cache_partitions*cache_linesize*cache_sets)/1024);
1375 
1376  //MEMDBG("\tWBINVD/INVD acts on lower caches: %d\n",cache_wb);
1377  //MEMDBG("\tCache is not inclusive: %d\n",cache_inclusive);
1378  //MEMDBG("\tComplex cache indexing: %d\n",cache_indexing);
1379 
1380  c->line_size=cache_linesize;
1381  if (cache_fullyassoc) {
1382  c->associativity=SHRT_MAX;
1383  }
1384  else {
1385  c->associativity=cache_ways;
1386  }
1387  c->size=(cache_ways*cache_partitions*cache_linesize*cache_sets);
1388  c->num_lines=cache_ways*cache_partitions*cache_sets;
1389 
1390  ecx_in++;
1391  }
1392  return PAPI_OK;
1393 }
#define PAPI_MH_TYPE_INST
Definition: papi.h:725
static void cpuid2(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx, unsigned int index, unsigned int ecx_in)
return PAPI_OK
Definition: linux-nvml.c:458
double c
Definition: multiplex.c:22
#define PAPI_MH_TYPE_DATA
Definition: papi.h:726
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:763
#define PAPI_ENOSUPP
Definition: fpapi.h:123
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:770
#define MEMDBG(format, args...)
Definition: papi_debug.h:70
nsize_list next
Definition: iozone.c:20053
#define PAPI_MH_TYPE_UNIFIED
Definition: papi.h:729
#define PAPI_MH_TYPE_EMPTY
Definition: papi.h:724
#define PAPI_MH_MAX_LEVELS
Definition: fpapi.h:87

Here is the call graph for this function:

Here is the caller graph for this function:

static void init_mem_hierarchy ( PAPI_mh_info_t mh_info)
static

Definition at line 95 of file x86_cpuid_info.c.

96 {
97  int i, j;
98  PAPI_mh_level_t *L = mh_info->level;
99 
100  /* initialize entire memory hierarchy structure to benign values */
101  for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
102  for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
103  L[i].tlb[j].type = PAPI_MH_TYPE_EMPTY;
104  L[i].tlb[j].num_entries = 0;
105  L[i].tlb[j].associativity = 0;
107  L[i].cache[j].size = 0;
108  L[i].cache[j].line_size = 0;
109  L[i].cache[j].num_lines = 0;
110  L[i].cache[j].associativity = 0;
111  }
112  }
113 }
int associativity
Definition: papi.h:748
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:763
int i
Definition: fileop.c:140
PAPI_mh_level_t level[PAPI_MAX_MEM_HIERARCHY_LEVELS]
Definition: papi.h:770
PAPI_mh_tlb_info_t tlb[PAPI_MH_MAX_LEVELS]
Definition: papi.h:762
#define PAPI_MH_TYPE_EMPTY
Definition: papi.h:724
long j
Definition: iozone.c:19135
#define PAPI_MH_MAX_LEVELS
Definition: fpapi.h:87
#define PAPI_MAX_MEM_HIERARCHY_LEVELS
Definition: papi.h:741

Here is the caller graph for this function:

static void intel_decode_descriptor ( struct _intel_cache_info d,
PAPI_mh_level_t L 
)
static

Definition at line 1211 of file x86_cpuid_info.c.

1212 {
1213  int i, next;
1214  int level = d->level - 1;
1217 
1218  if ( d->descriptor == 0x49 ) { /* special case */
1219  unsigned int r_eax, r_ebx, r_ecx, r_edx;
1220  r_eax = 0x1; /* function code 1: family & model */
1221  cpuid( &r_eax, &r_ebx, &r_ecx, &r_edx );
1222  /* override table for Family F, model 6 only */
1223  if ( ( r_eax & 0x0FFF3FF0 ) == 0xF60 )
1224  level = 3;
1225  }
1226  if ( d->type & PAPI_MH_TYPE_TLB ) {
1227  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1228  if ( L[level].tlb[next].type == PAPI_MH_TYPE_EMPTY )
1229  break;
1230  }
1231  /* expand TLB entries for multiple possible page sizes */
1232  for ( i = 0; i < TLB_SIZES && next < PAPI_MH_MAX_LEVELS && d->size[i];
1233  i++, next++ ) {
1234 // printf("Level %d Descriptor: %#x TLB type %#x next: %d, i: %d\n", level, d->descriptor, d->type, next, i);
1235  t = &L[level].tlb[next];
1236  t->type = PAPI_MH_CACHE_TYPE( d->type );
1237  t->num_entries = d->entries;
1238  t->page_size = d->size[i] << 10; /* minimum page size in KB */
1239  t->associativity = d->associativity;
1240  /* another special case */
1241  if ( d->descriptor == 0xB1 && d->size[i] == 4096 )
1242  t->num_entries = d->entries / 2;
1243  }
1244  } else {
1245  for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
1246  if ( L[level].cache[next].type == PAPI_MH_TYPE_EMPTY )
1247  break;
1248  }
1249 // printf("Level %d Descriptor: %#x Cache type %#x next: %d\n", level, d->descriptor, d->type, next);
1250  c = &L[level].cache[next];
1251  c->type = PAPI_MH_CACHE_TYPE( d->type );
1252  c->size = d->size[0] << 10; /* convert from KB to bytes */
1253  c->associativity = d->associativity;
1254  if ( d->line_size ) {
1255  c->line_size = d->line_size;
1256  c->num_lines = c->size / c->line_size;
1257  }
1258  }
1259 }
static void cpuid(unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
int size[TLB_SIZES]
int associativity
Definition: papi.h:748
double c
Definition: multiplex.c:22
#define PAPI_MH_TYPE_TLB
Definition: papi.h:738
t
Definition: iozone.c:23562
PAPI_mh_cache_info_t cache[PAPI_MH_MAX_LEVELS]
Definition: papi.h:763
int i
Definition: fileop.c:140
#define TLB_SIZES
PAPI_mh_tlb_info_t tlb[PAPI_MH_MAX_LEVELS]
Definition: papi.h:762
nsize_list next
Definition: iozone.c:20053
#define PAPI_MH_TYPE_EMPTY
Definition: papi.h:724
#define PAPI_MH_MAX_LEVELS
Definition: fpapi.h:87
#define PAPI_MH_CACHE_TYPE(a)
Definition: papi.h:730

Here is the call graph for this function:

Here is the caller graph for this function:

static void print_intel_cache_table ( )
static

Definition at line 1184 of file x86_cpuid_info.c.

1185 {
1186  int i, j, k =
1187  ( int ) ( sizeof ( intel_cache ) /
1188  sizeof ( struct _intel_cache_info ) );
1189  for ( i = 0; i < k; i++ ) {
1190  printf( "%d.\tDescriptor: %#x\n", i, intel_cache[i].descriptor );
1191  printf( "\t Level: %d\n", intel_cache[i].level );
1192  printf( "\t Type: %d\n", intel_cache[i].type );
1193  printf( "\t Size(s): " );
1194  for ( j = 0; j < TLB_SIZES; j++ )
1195  printf( "%d, ", intel_cache[i].size[j] );
1196  printf( "\n" );
1197  printf( "\t Assoc: %d\n", intel_cache[i].associativity );
1198  printf( "\t Sector: %d\n", intel_cache[i].sector );
1199  printf( "\t Line Size: %d\n", intel_cache[i].line_size );
1200  printf( "\t Entries: %d\n", intel_cache[i].entries );
1201  printf( "\n" );
1202  }
1203 }
static struct _intel_cache_info intel_cache[]
#define printf
Definition: papi_test.h:125
int i
Definition: fileop.c:140
char *long long size
Definition: iozone.c:12023
#define TLB_SIZES
int k
Definition: iozone.c:19136
int
Definition: iozone.c:18528
long j
Definition: iozone.c:19135

Here is the caller graph for this function:

Variable Documentation

struct _intel_cache_info intel_cache[]
static

Definition at line 373 of file x86_cpuid_info.c.