vector.c File Reference

Include dependency graph for vector.c:

Go to the source code of this file.

Defines

#define NUMBER   100

Functions

void inline_packed_sse_add (float *aa, float *bb, float *cc)
void inline_packed_sse_mul (float *aa, float *bb, float *cc)
void inline_packed_sse2_add (double *aa, double *bb, double *cc)
void inline_packed_sse2_mul (double *aa, double *bb, double *cc)
void inline_unpacked_sse_add (float *aa, float *bb, float *cc)
void inline_unpacked_sse_mul (float *aa, float *bb, float *cc)
void inline_unpacked_sse2_add (double *aa, double *bb, double *cc)
void inline_unpacked_sse2_mul (double *aa, double *bb, double *cc)
int main (int argc, char **argv)

Define Documentation

#define NUMBER   100

Definition at line 4 of file vector.c.


Function Documentation

void inline_packed_sse2_add ( double *  aa,
double *  bb,
double *  cc 
) [inline]

Definition at line 27 of file vector.c.

00028 {
00029     __asm__ __volatile__( "movapd (%0), %%xmm0;"
00030                           "movapd (%1), %%xmm1;"
00031                           "addpd %%xmm0, %%xmm1;"
00032                           "movapd %%xmm1, (%2);"::"r"( aa ),
00033                           "r"( bb ), "r"( cc )
00034                           :"%xmm0", "%xmm1" );
00035 }

Here is the caller graph for this function:

void inline_packed_sse2_mul ( double *  aa,
double *  bb,
double *  cc 
) [inline]

Definition at line 37 of file vector.c.

00038 {
00039     __asm__ __volatile__( "movapd (%0), %%xmm0;"
00040                           "movapd (%1), %%xmm1;"
00041                           "mulpd %%xmm0, %%xmm1;"
00042                           "movapd %%xmm1, (%2);"::"r"( aa ),
00043                           "r"( bb ), "r"( cc )
00044                           :"%xmm0", "%xmm1" );
00045 }

Here is the caller graph for this function:

void inline_packed_sse_add ( float *  aa,
float *  bb,
float *  cc 
) [inline]

Definition at line 7 of file vector.c.

00008 {
00009     __asm__ __volatile__( "movaps (%0), %%xmm0;"
00010                           "movaps (%1), %%xmm1;"
00011                           "addps %%xmm0, %%xmm1;"
00012                           "movaps %%xmm1, (%2);"::"r"( aa ),
00013                           "r"( bb ), "r"( cc )
00014                           :"%xmm0", "%xmm1" );
00015 }

Here is the caller graph for this function:

void inline_packed_sse_mul ( float *  aa,
float *  bb,
float *  cc 
) [inline]

Definition at line 17 of file vector.c.

00018 {
00019     __asm__ __volatile__( "movaps (%0), %%xmm0;"
00020                           "movaps (%1), %%xmm1;"
00021                           "mulps %%xmm0, %%xmm1;"
00022                           "movaps %%xmm1, (%2);"::"r"( aa ),
00023                           "r"( bb ), "r"( cc )
00024                           :"%xmm0", "%xmm1" );
00025 }

Here is the caller graph for this function:

void inline_unpacked_sse2_add ( double *  aa,
double *  bb,
double *  cc 
) [inline]

Definition at line 65 of file vector.c.

00066 {
00067     __asm__ __volatile__( "movsd (%0), %%xmm0;"
00068                           "movsd (%1), %%xmm1;"
00069                           "addsd %%xmm0, %%xmm1;"
00070                           "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
00071                           :"%xmm0", "%xmm1" );
00072 }

Here is the caller graph for this function:

void inline_unpacked_sse2_mul ( double *  aa,
double *  bb,
double *  cc 
) [inline]

Definition at line 74 of file vector.c.

00075 {
00076     __asm__ __volatile__( "movsd (%0), %%xmm0;"
00077                           "movsd (%1), %%xmm1;"
00078                           "mulsd %%xmm0, %%xmm1;"
00079                           "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
00080                           :"%xmm0", "%xmm1" );
00081 }

Here is the caller graph for this function:

void inline_unpacked_sse_add ( float *  aa,
float *  bb,
float *  cc 
) [inline]

Definition at line 47 of file vector.c.

00048 {
00049     __asm__ __volatile__( "movss (%0), %%xmm0;"
00050                           "movss (%1), %%xmm1;"
00051                           "addss %%xmm0, %%xmm1;"
00052                           "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
00053                           :"%xmm0", "%xmm1" );
00054 }

Here is the caller graph for this function:

void inline_unpacked_sse_mul ( float *  aa,
float *  bb,
float *  cc 
) [inline]

Definition at line 56 of file vector.c.

00057 {
00058     __asm__ __volatile__( "movss (%0), %%xmm0;"
00059                           "movss (%1), %%xmm1;"
00060                           "mulss %%xmm0, %%xmm1;"
00061                           "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
00062                           :"%xmm0", "%xmm1" );
00063 }

Here is the caller graph for this function:

int main ( int  argc,
char **  argv 
)

Definition at line 84 of file vector.c.

00085 {
00086     int i, packed = 0, sse = 0;
00087     float a[4] = { 1.0, 2.0, 3.0, 4.0 };
00088     float b[4] = { 2.0, 3.0, 4.0, 5.0 };
00089     float c[4] = { 0.0, 0.0, 0.0, 0.0 };
00090     double d[4] = { 1.0, 2.0, 3.0, 4.0 };
00091     double e[4] = { 2.0, 3.0, 4.0, 5.0 };
00092     double f[4] = { 0.0, 0.0, 0.0, 0.0 };
00093 
00094     if ( argc != 3 ) {
00095       bail:
00096         printf( "Usage %s: <packed|unpacked> <sse|sse2>\n", argv[0] );
00097         exit( 1 );
00098     }
00099     if ( strcasecmp( argv[1], "packed" ) == 0 )
00100         packed = 1;
00101     else if ( strcasecmp( argv[1], "unpacked" ) == 0 )
00102         packed = 0;
00103     else
00104         goto bail;
00105     if ( strcasecmp( argv[2], "sse" ) == 0 )
00106         sse = 1;
00107     else if ( strcasecmp( argv[2], "sse2" ) == 0 )
00108         sse = 0;
00109     else
00110         goto bail;
00111 
00112 #if 0
00113     if ( ( sse ) &&
00114          ( system( "cat /proc/cpuinfo | grep sse > /dev/null" ) != 0 ) ) {
00115         printf( "This processor does not have SSE.\n" );
00116         exit( 1 );
00117     }
00118     if ( ( sse == 0 ) &&
00119          ( system( "cat /proc/cpuinfo | grep sse2 > /dev/null" ) != 0 ) ) {
00120         printf( "This processor does not have SSE2.\n" );
00121         exit( 1 );
00122     }
00123 #endif
00124 
00125     printf( "Vector 1: %f %f %f %f\n", a[0], a[1], a[2], a[3] );
00126     printf( "Vector 2: %f %f %f %f\n\n", b[0], b[1], b[2], b[3] );
00127 
00128     if ( ( packed == 0 ) && ( sse == 1 ) ) {
00129         for ( i = 0; i < NUMBER; i++ ) {
00130             inline_unpacked_sse_add( &a[0], &b[0], &c[0] );
00131         }
00132         printf( "%d SSE Unpacked Adds: Result %f\n", NUMBER, c[0] );
00133 
00134         for ( i = 0; i < NUMBER; i++ ) {
00135             inline_unpacked_sse_mul( &a[0], &b[0], &c[0] );
00136         }
00137         printf( "%d SSE Unpacked Muls: Result %f\n", NUMBER, c[0] );
00138     }
00139     if ( ( packed == 1 ) && ( sse == 1 ) ) {
00140         for ( i = 0; i < NUMBER; i++ ) {
00141             inline_packed_sse_add( a, b, c );
00142         }
00143         printf( "%d SSE Packed Adds: Result %f %f %f %f\n", NUMBER, c[0], c[1],
00144                 c[2], c[3] );
00145         for ( i = 0; i < NUMBER; i++ ) {
00146             inline_packed_sse_mul( a, b, c );
00147         }
00148         printf( "%d SSE Packed Muls: Result %f %f %f %f\n", NUMBER, c[0], c[1],
00149                 c[2], c[3] );
00150     }
00151 
00152     if ( ( packed == 0 ) && ( sse == 0 ) ) {
00153         for ( i = 0; i < NUMBER; i++ ) {
00154             inline_unpacked_sse2_add( &d[0], &e[0], &f[0] );
00155         }
00156         printf( "%d SSE2 Unpacked Adds: Result %f\n", NUMBER, c[0] );
00157 
00158         for ( i = 0; i < NUMBER; i++ ) {
00159             inline_unpacked_sse2_mul( &d[0], &e[0], &f[0] );
00160         }
00161         printf( "%d SSE2 Unpacked Muls: Result %f\n", NUMBER, c[0] );
00162     }
00163     if ( ( packed == 1 ) && ( sse == 0 ) ) {
00164         for ( i = 0; i < NUMBER; i++ ) {
00165             inline_packed_sse2_add( &d[0], &e[0], &f[0] );
00166         }
00167         printf( "%d SSE2 Packed Adds: Result %f\n", NUMBER, c[0] );
00168 
00169         for ( i = 0; i < NUMBER; i++ ) {
00170             inline_packed_sse2_mul( &d[0], &e[0], &f[0] );
00171         }
00172         printf( "%d SSE2 Packed Muls: Result %f\n", NUMBER, c[0] );
00173     }
00174 
00175 
00176     exit( 0 );
00177 }

Here is the call graph for this function:


Generated on 17 Nov 2016 for PAPI by  doxygen 1.6.1