01 // Include SIMD Header 02 #ifdef __INTEL_COMPILER 03 # include // also contains xmmintrin.h 04 #else 05 # include 06 #endif 07 08 typedef union{ 09 __m128 m; float f[4]; 10 } vector; 11 12 // Compute scalar product using Intrisics: 13 float scalarproductIntrinsics(float x[], float y[]) 14 { 15 __m128 *vecX = (__m128 *)x; 16 __m128 *vecY = (__m128 *)y; 17 vector tmp; 18 19 tmp.m = _mm_mul_ps(vecX[0],vecY[0]); 20 21 return tmp.f[0] + tmp.f[1] + tmp.f[2] + tmp.f[3]; 22 } 23 24 // Compute scalar product with F32vec4: 25 float scalarproductSSE(float x[], float y[]) 26 { 27 F32vec4 *vecX = (F32vec4 *)x; 28 F32vec4 *vecY = (F32vec4 *)y; 29 F32vec4 tmp; 30 31 tmp = vecX[0] * vecY[0]; 32 33 return tmp[0] + tmp[1] + tmp[2] + tmp[3]; 34 }