PAPI  5.3.2.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vector.c
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #define NUMBER 100
5 
6 inline void
7 inline_packed_sse_add( float *aa, float *bb, float *cc )
8 {
9  __asm__ __volatile__( "movaps (%0), %%xmm0;"
10  "movaps (%1), %%xmm1;"
11  "addps %%xmm0, %%xmm1;"
12  "movaps %%xmm1, (%2);"::"r"( aa ),
13  "r"( bb ), "r"( cc )
14  :"%xmm0", "%xmm1" );
15 }
16 inline void
17 inline_packed_sse_mul( float *aa, float *bb, float *cc )
18 {
19  __asm__ __volatile__( "movaps (%0), %%xmm0;"
20  "movaps (%1), %%xmm1;"
21  "mulps %%xmm0, %%xmm1;"
22  "movaps %%xmm1, (%2);"::"r"( aa ),
23  "r"( bb ), "r"( cc )
24  :"%xmm0", "%xmm1" );
25 }
26 inline void
27 inline_packed_sse2_add( double *aa, double *bb, double *cc )
28 {
29  __asm__ __volatile__( "movapd (%0), %%xmm0;"
30  "movapd (%1), %%xmm1;"
31  "addpd %%xmm0, %%xmm1;"
32  "movapd %%xmm1, (%2);"::"r"( aa ),
33  "r"( bb ), "r"( cc )
34  :"%xmm0", "%xmm1" );
35 }
36 inline void
37 inline_packed_sse2_mul( double *aa, double *bb, double *cc )
38 {
39  __asm__ __volatile__( "movapd (%0), %%xmm0;"
40  "movapd (%1), %%xmm1;"
41  "mulpd %%xmm0, %%xmm1;"
42  "movapd %%xmm1, (%2);"::"r"( aa ),
43  "r"( bb ), "r"( cc )
44  :"%xmm0", "%xmm1" );
45 }
46 inline void
47 inline_unpacked_sse_add( float *aa, float *bb, float *cc )
48 {
49  __asm__ __volatile__( "movss (%0), %%xmm0;"
50  "movss (%1), %%xmm1;"
51  "addss %%xmm0, %%xmm1;"
52  "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
53  :"%xmm0", "%xmm1" );
54 }
55 inline void
56 inline_unpacked_sse_mul( float *aa, float *bb, float *cc )
57 {
58  __asm__ __volatile__( "movss (%0), %%xmm0;"
59  "movss (%1), %%xmm1;"
60  "mulss %%xmm0, %%xmm1;"
61  "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
62  :"%xmm0", "%xmm1" );
63 }
64 inline void
65 inline_unpacked_sse2_add( double *aa, double *bb, double *cc )
66 {
67  __asm__ __volatile__( "movsd (%0), %%xmm0;"
68  "movsd (%1), %%xmm1;"
69  "addsd %%xmm0, %%xmm1;"
70  "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
71  :"%xmm0", "%xmm1" );
72 }
73 inline void
74 inline_unpacked_sse2_mul( double *aa, double *bb, double *cc )
75 {
76  __asm__ __volatile__( "movsd (%0), %%xmm0;"
77  "movsd (%1), %%xmm1;"
78  "mulsd %%xmm0, %%xmm1;"
79  "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
80  :"%xmm0", "%xmm1" );
81 }
82 
83 int
84 main( int argc, char **argv )
85 {
86  int i, packed = 0, sse = 0;
87  float a[4] = { 1.0, 2.0, 3.0, 4.0 };
88  float b[4] = { 2.0, 3.0, 4.0, 5.0 };
89  float c[4] = { 0.0, 0.0, 0.0, 0.0 };
90  double d[4] = { 1.0, 2.0, 3.0, 4.0 };
91  double e[4] = { 2.0, 3.0, 4.0, 5.0 };
92  double f[4] = { 0.0, 0.0, 0.0, 0.0 };
93 
94  if ( argc != 3 ) {
95  bail:
96  printf( "Usage %s: <packed|unpacked> <sse|sse2>\n", argv[0] );
97  exit( 1 );
98  }
99  if ( strcasecmp( argv[1], "packed" ) == 0 )
100  packed = 1;
101  else if ( strcasecmp( argv[1], "unpacked" ) == 0 )
102  packed = 0;
103  else
104  goto bail;
105  if ( strcasecmp( argv[2], "sse" ) == 0 )
106  sse = 1;
107  else if ( strcasecmp( argv[2], "sse2" ) == 0 )
108  sse = 0;
109  else
110  goto bail;
111 
112 #if 0
113  if ( ( sse ) &&
114  ( system( "cat /proc/cpuinfo | grep sse > /dev/null" ) != 0 ) ) {
115  printf( "This processor does not have SSE.\n" );
116  exit( 1 );
117  }
118  if ( ( sse == 0 ) &&
119  ( system( "cat /proc/cpuinfo | grep sse2 > /dev/null" ) != 0 ) ) {
120  printf( "This processor does not have SSE2.\n" );
121  exit( 1 );
122  }
123 #endif
124 
125  printf( "Vector 1: %f %f %f %f\n", a[0], a[1], a[2], a[3] );
126  printf( "Vector 2: %f %f %f %f\n\n", b[0], b[1], b[2], b[3] );
127 
128  if ( ( packed == 0 ) && ( sse == 1 ) ) {
129  for ( i = 0; i < NUMBER; i++ ) {
130  inline_unpacked_sse_add( &a[0], &b[0], &c[0] );
131  }
132  printf( "%d SSE Unpacked Adds: Result %f\n", NUMBER, c[0] );
133 
134  for ( i = 0; i < NUMBER; i++ ) {
135  inline_unpacked_sse_mul( &a[0], &b[0], &c[0] );
136  }
137  printf( "%d SSE Unpacked Muls: Result %f\n", NUMBER, c[0] );
138  }
139  if ( ( packed == 1 ) && ( sse == 1 ) ) {
140  for ( i = 0; i < NUMBER; i++ ) {
141  inline_packed_sse_add( a, b, c );
142  }
143  printf( "%d SSE Packed Adds: Result %f %f %f %f\n", NUMBER, c[0], c[1],
144  c[2], c[3] );
145  for ( i = 0; i < NUMBER; i++ ) {
146  inline_packed_sse_mul( a, b, c );
147  }
148  printf( "%d SSE Packed Muls: Result %f %f %f %f\n", NUMBER, c[0], c[1],
149  c[2], c[3] );
150  }
151 
152  if ( ( packed == 0 ) && ( sse == 0 ) ) {
153  for ( i = 0; i < NUMBER; i++ ) {
154  inline_unpacked_sse2_add( &d[0], &e[0], &f[0] );
155  }
156  printf( "%d SSE2 Unpacked Adds: Result %f\n", NUMBER, c[0] );
157 
158  for ( i = 0; i < NUMBER; i++ ) {
159  inline_unpacked_sse2_mul( &d[0], &e[0], &f[0] );
160  }
161  printf( "%d SSE2 Unpacked Muls: Result %f\n", NUMBER, c[0] );
162  }
163  if ( ( packed == 1 ) && ( sse == 0 ) ) {
164  for ( i = 0; i < NUMBER; i++ ) {
165  inline_packed_sse2_add( &d[0], &e[0], &f[0] );
166  }
167  printf( "%d SSE2 Packed Adds: Result %f\n", NUMBER, c[0] );
168 
169  for ( i = 0; i < NUMBER; i++ ) {
170  inline_packed_sse2_mul( &d[0], &e[0], &f[0] );
171  }
172  printf( "%d SSE2 Packed Muls: Result %f\n", NUMBER, c[0] );
173  }
174 
175 
176  exit( 0 );
177 }
void inline_unpacked_sse2_mul(double *aa, double *bb, double *cc)
Definition: vector.c:74
double f(double a)
Definition: cpi.c:23
void inline_unpacked_sse_add(float *aa, float *bb, float *cc)
Definition: vector.c:47
void inline_packed_sse_add(float *aa, float *bb, float *cc)
Definition: vector.c:7
void inline_unpacked_sse2_add(double *aa, double *bb, double *cc)
Definition: vector.c:65
double aa[N]
Definition: byte_profile.c:30
#define printf
Definition: papi_test.h:125
double c
Definition: multiplex.c:22
int int argc
Definition: iozone.c:1609
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:37
char ** argv
Definition: iozone.c:1610
struct client_command cc
Definition: iozone.c:21326
int i
Definition: fileop.c:140
void inline_packed_sse_mul(float *aa, float *bb, float *cc)
Definition: vector.c:17
double bb[N]
Definition: byte_profile.c:30
#define NUMBER
Definition: vector.c:4
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: rapl_basic.c:38
void inline_packed_sse2_add(double *aa, double *bb, double *cc)
Definition: vector.c:27
void inline_packed_sse2_mul(double *aa, double *bb, double *cc)
Definition: vector.c:37
int system()
int main(int argc, char **argv)
List all appio events codes and names.
void exit()
void inline_unpacked_sse_mul(float *aa, float *bb, float *cc)
Definition: vector.c:56