PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vector.c File Reference
Include dependency graph for vector.c:

Go to the source code of this file.

Macros

#define NUMBER   100
 

Functions

void inline_packed_sse_add (float *aa, float *bb, float *cc)
 
void inline_packed_sse_mul (float *aa, float *bb, float *cc)
 
void inline_packed_sse2_add (double *aa, double *bb, double *cc)
 
void inline_packed_sse2_mul (double *aa, double *bb, double *cc)
 
void inline_unpacked_sse_add (float *aa, float *bb, float *cc)
 
void inline_unpacked_sse_mul (float *aa, float *bb, float *cc)
 
void inline_unpacked_sse2_add (double *aa, double *bb, double *cc)
 
void inline_unpacked_sse2_mul (double *aa, double *bb, double *cc)
 
int main (int argc, char **argv)
 

Macro Definition Documentation

#define NUMBER   100

Definition at line 4 of file vector.c.

Function Documentation

void inline_packed_sse2_add ( double aa,
double bb,
double cc 
)
inline

Definition at line 27 of file vector.c.

28 {
29  __asm__ __volatile__( "movapd (%0), %%xmm0;"
30  "movapd (%1), %%xmm1;"
31  "addpd %%xmm0, %%xmm1;"
32  "movapd %%xmm1, (%2);"::"r"( aa ),
33  "r"( bb ), "r"( cc )
34  :"%xmm0", "%xmm1" );
35 }
double aa[N]
Definition: byte_profile.c:30
struct client_command cc
Definition: iozone.c:21326
double bb[N]
Definition: byte_profile.c:30

Here is the caller graph for this function:

void inline_packed_sse2_mul ( double aa,
double bb,
double cc 
)
inline

Definition at line 37 of file vector.c.

38 {
39  __asm__ __volatile__( "movapd (%0), %%xmm0;"
40  "movapd (%1), %%xmm1;"
41  "mulpd %%xmm0, %%xmm1;"
42  "movapd %%xmm1, (%2);"::"r"( aa ),
43  "r"( bb ), "r"( cc )
44  :"%xmm0", "%xmm1" );
45 }
double aa[N]
Definition: byte_profile.c:30
struct client_command cc
Definition: iozone.c:21326
double bb[N]
Definition: byte_profile.c:30

Here is the caller graph for this function:

void inline_packed_sse_add ( float *  aa,
float *  bb,
float *  cc 
)
inline

Definition at line 7 of file vector.c.

8 {
9  __asm__ __volatile__( "movaps (%0), %%xmm0;"
10  "movaps (%1), %%xmm1;"
11  "addps %%xmm0, %%xmm1;"
12  "movaps %%xmm1, (%2);"::"r"( aa ),
13  "r"( bb ), "r"( cc )
14  :"%xmm0", "%xmm1" );
15 }
double aa[N]
Definition: byte_profile.c:30
struct client_command cc
Definition: iozone.c:21326
double bb[N]
Definition: byte_profile.c:30

Here is the caller graph for this function:

void inline_packed_sse_mul ( float *  aa,
float *  bb,
float *  cc 
)
inline

Definition at line 17 of file vector.c.

18 {
19  __asm__ __volatile__( "movaps (%0), %%xmm0;"
20  "movaps (%1), %%xmm1;"
21  "mulps %%xmm0, %%xmm1;"
22  "movaps %%xmm1, (%2);"::"r"( aa ),
23  "r"( bb ), "r"( cc )
24  :"%xmm0", "%xmm1" );
25 }
double aa[N]
Definition: byte_profile.c:30
struct client_command cc
Definition: iozone.c:21326
double bb[N]
Definition: byte_profile.c:30

Here is the caller graph for this function:

void inline_unpacked_sse2_add ( double aa,
double bb,
double cc 
)
inline

Definition at line 65 of file vector.c.

66 {
67  __asm__ __volatile__( "movsd (%0), %%xmm0;"
68  "movsd (%1), %%xmm1;"
69  "addsd %%xmm0, %%xmm1;"
70  "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
71  :"%xmm0", "%xmm1" );
72 }
double aa[N]
Definition: byte_profile.c:30
struct client_command cc
Definition: iozone.c:21326
double bb[N]
Definition: byte_profile.c:30

Here is the caller graph for this function:

void inline_unpacked_sse2_mul ( double aa,
double bb,
double cc 
)
inline

Definition at line 74 of file vector.c.

75 {
76  __asm__ __volatile__( "movsd (%0), %%xmm0;"
77  "movsd (%1), %%xmm1;"
78  "mulsd %%xmm0, %%xmm1;"
79  "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
80  :"%xmm0", "%xmm1" );
81 }
double aa[N]
Definition: byte_profile.c:30
struct client_command cc
Definition: iozone.c:21326
double bb[N]
Definition: byte_profile.c:30

Here is the caller graph for this function:

void inline_unpacked_sse_add ( float *  aa,
float *  bb,
float *  cc 
)
inline

Definition at line 47 of file vector.c.

48 {
49  __asm__ __volatile__( "movss (%0), %%xmm0;"
50  "movss (%1), %%xmm1;"
51  "addss %%xmm0, %%xmm1;"
52  "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
53  :"%xmm0", "%xmm1" );
54 }
double aa[N]
Definition: byte_profile.c:30
struct client_command cc
Definition: iozone.c:21326
double bb[N]
Definition: byte_profile.c:30

Here is the caller graph for this function:

void inline_unpacked_sse_mul ( float *  aa,
float *  bb,
float *  cc 
)
inline

Definition at line 56 of file vector.c.

57 {
58  __asm__ __volatile__( "movss (%0), %%xmm0;"
59  "movss (%1), %%xmm1;"
60  "mulss %%xmm0, %%xmm1;"
61  "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
62  :"%xmm0", "%xmm1" );
63 }
double aa[N]
Definition: byte_profile.c:30
struct client_command cc
Definition: iozone.c:21326
double bb[N]
Definition: byte_profile.c:30

Here is the caller graph for this function:

int main ( int  argc,
char **  argv 
)

Definition at line 84 of file vector.c.

85 {
86  int i, packed = 0, sse = 0;
87  float a[4] = { 1.0, 2.0, 3.0, 4.0 };
88  float b[4] = { 2.0, 3.0, 4.0, 5.0 };
89  float c[4] = { 0.0, 0.0, 0.0, 0.0 };
90  double d[4] = { 1.0, 2.0, 3.0, 4.0 };
91  double e[4] = { 2.0, 3.0, 4.0, 5.0 };
92  double f[4] = { 0.0, 0.0, 0.0, 0.0 };
93 
94  if ( argc != 3 ) {
95  bail:
96  printf( "Usage %s: <packed|unpacked> <sse|sse2>\n", argv[0] );
97  exit( 1 );
98  }
99  if ( strcasecmp( argv[1], "packed" ) == 0 )
100  packed = 1;
101  else if ( strcasecmp( argv[1], "unpacked" ) == 0 )
102  packed = 0;
103  else
104  goto bail;
105  if ( strcasecmp( argv[2], "sse" ) == 0 )
106  sse = 1;
107  else if ( strcasecmp( argv[2], "sse2" ) == 0 )
108  sse = 0;
109  else
110  goto bail;
111 
112 #if 0
113  if ( ( sse ) &&
114  ( system( "cat /proc/cpuinfo | grep sse > /dev/null" ) != 0 ) ) {
115  printf( "This processor does not have SSE.\n" );
116  exit( 1 );
117  }
118  if ( ( sse == 0 ) &&
119  ( system( "cat /proc/cpuinfo | grep sse2 > /dev/null" ) != 0 ) ) {
120  printf( "This processor does not have SSE2.\n" );
121  exit( 1 );
122  }
123 #endif
124 
125  printf( "Vector 1: %f %f %f %f\n", a[0], a[1], a[2], a[3] );
126  printf( "Vector 2: %f %f %f %f\n\n", b[0], b[1], b[2], b[3] );
127 
128  if ( ( packed == 0 ) && ( sse == 1 ) ) {
129  for ( i = 0; i < NUMBER; i++ ) {
130  inline_unpacked_sse_add( &a[0], &b[0], &c[0] );
131  }
132  printf( "%d SSE Unpacked Adds: Result %f\n", NUMBER, c[0] );
133 
134  for ( i = 0; i < NUMBER; i++ ) {
135  inline_unpacked_sse_mul( &a[0], &b[0], &c[0] );
136  }
137  printf( "%d SSE Unpacked Muls: Result %f\n", NUMBER, c[0] );
138  }
139  if ( ( packed == 1 ) && ( sse == 1 ) ) {
140  for ( i = 0; i < NUMBER; i++ ) {
141  inline_packed_sse_add( a, b, c );
142  }
143  printf( "%d SSE Packed Adds: Result %f %f %f %f\n", NUMBER, c[0], c[1],
144  c[2], c[3] );
145  for ( i = 0; i < NUMBER; i++ ) {
146  inline_packed_sse_mul( a, b, c );
147  }
148  printf( "%d SSE Packed Muls: Result %f %f %f %f\n", NUMBER, c[0], c[1],
149  c[2], c[3] );
150  }
151 
152  if ( ( packed == 0 ) && ( sse == 0 ) ) {
153  for ( i = 0; i < NUMBER; i++ ) {
154  inline_unpacked_sse2_add( &d[0], &e[0], &f[0] );
155  }
156  printf( "%d SSE2 Unpacked Adds: Result %f\n", NUMBER, c[0] );
157 
158  for ( i = 0; i < NUMBER; i++ ) {
159  inline_unpacked_sse2_mul( &d[0], &e[0], &f[0] );
160  }
161  printf( "%d SSE2 Unpacked Muls: Result %f\n", NUMBER, c[0] );
162  }
163  if ( ( packed == 1 ) && ( sse == 0 ) ) {
164  for ( i = 0; i < NUMBER; i++ ) {
165  inline_packed_sse2_add( &d[0], &e[0], &f[0] );
166  }
167  printf( "%d SSE2 Packed Adds: Result %f\n", NUMBER, c[0] );
168 
169  for ( i = 0; i < NUMBER; i++ ) {
170  inline_packed_sse2_mul( &d[0], &e[0], &f[0] );
171  }
172  printf( "%d SSE2 Packed Muls: Result %f\n", NUMBER, c[0] );
173  }
174 
175 
176  exit( 0 );
177 }
void inline_unpacked_sse2_mul(double *aa, double *bb, double *cc)
Definition: vector.c:74
double f(double a)
Definition: cpi.c:23
void inline_unpacked_sse_add(float *aa, float *bb, float *cc)
Definition: vector.c:47
void inline_packed_sse_add(float *aa, float *bb, float *cc)
Definition: vector.c:7
void inline_unpacked_sse2_add(double *aa, double *bb, double *cc)
Definition: vector.c:65
#define printf
Definition: papi_test.h:125
double c
Definition: multiplex.c:22
int int argc
Definition: iozone.c:1609
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:37
char ** argv
Definition: iozone.c:1610
int i
Definition: fileop.c:140
void inline_packed_sse_mul(float *aa, float *bb, float *cc)
Definition: vector.c:17
#define NUMBER
Definition: vector.c:4
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:38
void inline_packed_sse2_add(double *aa, double *bb, double *cc)
Definition: vector.c:27
void inline_packed_sse2_mul(double *aa, double *bb, double *cc)
Definition: vector.c:37
int system()
void exit()
void inline_unpacked_sse_mul(float *aa, float *bb, float *cc)
Definition: vector.c:56

Here is the call graph for this function: