#include #include #define N 64 struct float_test { float x[N], y[N], z[N], expected[N], res[N]; } ft __attribute__((aligned (32))); struct double_test { double x[N], y[N], z[N], expected[N], res[N]; } dt __attribute__((aligned (32))); float plus_zero, plus_infty, minus_infty, nan_value; static int testf( float x, float y ) { unsigned int a, b; memcpy( &a, &x, sizeof (a) ); memcpy( &b, &y, sizeof (b) ); if ((a & 0x7fc00000U) == 0x7fc00000U) return (b & 0x7fc00000U) != 0x7fc00000U; return memcmp( &a, &b, sizeof (a) ) != 0; } static int test_fmaf( void ) { int res = 0, i, j; float w; for (i = 0; i < N; i++) { int thisres = 0; __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); if (thisres) printf( "Failure 1 %d %a %a\n", i, w, ft.expected[i] ); res |= thisres; thisres = 0; __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); if (thisres) printf( "Failure 2 %d %a %a\n", i, w, ft.expected[i] ); res |= thisres; } for (i = 0; i < N; i++) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i++) { int thisres = 0; __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); __asm __volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); thisres |= testf( w, ft.expected[i] ); if (thisres) printf( "Failure 3 %d %a %a\n", i, w, ft.expected[i] ); res |= thisres; thisres = 0; __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w) : "0" (ft.x[i]), "m" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "x" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w) : "0" (ft.x[i]), "x" (ft.y[i]), "m" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "x" (ft.y[i]), "0" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); __asm __volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w) : "x" (ft.x[i]), "m" (ft.y[i]), "0" (ft.z[i])); thisres |= testf( -w, ft.expected[i] ); if (thisres) printf( "Failure 4 %d %a %a\n", i, w, ft.expected[i] ); res |= thisres; } for (i = 0; i < N; i++) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i += 4) { int thisres = 0; __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" "vfmadd132ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" "vfmadd132ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" "vfmadd213ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" "vfmadd213ps (%3), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" "vfmadd231ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" "vfmadd231ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 5 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; thisres = 0; __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" "vfnmsub132ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" "vfnmsub132ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" "vfnmsub213ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" "vfnmsub213ps (%3), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" "vfnmsub231ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" "vfnmsub231ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 6 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 0; i < N; i++) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i += 4) { int thisres = 0; __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" "vfmsub132ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" "vfmsub132ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" "vfmsub213ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" "vfmsub213ps (%3), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" "vfmsub231ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" "vfmsub231ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 7 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; thisres = 0; __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" "vfnmadd132ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" "vfnmadd132ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" "vfnmadd213ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" "vfnmadd213ps (%3), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" "vfnmadd231ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" "vfnmadd231ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 8 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 1; i < N; i += 2) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i += 4) { int thisres = 0; __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" "vfmaddsub132ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" "vfmaddsub132ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" "vfmaddsub213ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" "vfmaddsub213ps (%3), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" "vfmaddsub231ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" "vfmaddsub231ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 9 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 0; i < N; i++) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i += 4) { int thisres = 0; __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;" "vfmsubadd132ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;" "vfmsubadd132ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;" "vfmsubadd213ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;" "vfmsubadd213ps (%3), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;" "vfmsubadd231ps %%xmm7, %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;" "vfmsubadd231ps (%2), %%xmm8, %%xmm9;" "vmovaps %%xmm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 10 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 1; i < N; i += 2) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i += 8) { int thisres = 0; __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" "vfmadd132ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" "vfmadd132ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" "vfmadd213ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" "vfmadd213ps (%3), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" "vfmadd231ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" "vfmadd231ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 11 %d", i ); for (j = 0; j < 8; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; thisres = 0; __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" "vfnmsub132ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" "vfnmsub132ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" "vfnmsub213ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" "vfnmsub213ps (%3), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" "vfnmsub231ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" "vfnmsub231ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 12 %d", i ); for (j = 0; j < 8; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 0; i < N; i++) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i += 8) { int thisres = 0; __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" "vfmsub132ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" "vfmsub132ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" "vfmsub213ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" "vfmsub213ps (%3), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" "vfmsub231ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" "vfmsub231ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 13 %d", i ); for (j = 0; j < 8; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; thisres = 0; __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" "vfnmadd132ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" "vfnmadd132ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" "vfnmadd213ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" "vfnmadd213ps (%3), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" "vfnmadd231ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" "vfnmadd231ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( -ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 14 %d", i ); for (j = 0; j < 8; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 1; i < N; i += 2) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i += 8) { int thisres = 0; __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" "vfmaddsub132ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" "vfmaddsub132ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" "vfmaddsub213ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" "vfmaddsub213ps (%3), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" "vfmaddsub231ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" "vfmaddsub231ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 15 %d", i ); for (j = 0; j < 8; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 0; i < N; i++) ft.z[i] = -ft.z[i]; for (i = 0; i < N; i += 8) { int thisres = 0; __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;" "vfmsubadd132ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;" "vfmsubadd132ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;" "vfmsubadd213ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;" "vfmsubadd213ps (%3), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;" "vfmsubadd231ps %%ymm7, %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); __asm __volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;" "vfmsubadd231ps (%2), %%ymm8, %%ymm9;" "vmovaps %%ymm9, (%0)" : : "r" (&ft.res[i]), "r" (&ft.x[i]), "r" (&ft.y[i]), "r" (&ft.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 8; j++) thisres |= testf( ft.res[i+j], ft.expected[i+j] ); if (thisres) { printf( "Failure 16 %d", i ); for (j = 0; j < 8; j++) printf( " %a %a", ft.res[i+j], ft.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 1; i < N; i += 2) ft.z[i] = -ft.z[i]; return res; } static int test( double x, double y ) { unsigned long long a, b; memcpy( &a, &x, sizeof (a) ); memcpy( &b, &y, sizeof (b) ); if ((a & 0x7ff8000000000000ULL) == 0x7ff8000000000000ULL) return (b & 0x7ff8000000000000ULL) != 0x7ff8000000000000ULL; return memcmp( &a, &b, sizeof (a) ) != 0; } static int test_fma( void ) { int res = 0, i, j; double w; for (i = 0; i < N; i++) { int thisres = 0; __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); thisres |= test( w, dt.expected[i] ); if (thisres) printf( "Failure 1 %d %a %a\n", i, w, dt.expected[i] ); res |= thisres; thisres = 0; __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); if (thisres) printf( "Failure 2 %d %a %a\n", i, w, dt.expected[i] ); res |= thisres; } for (i = 0; i < N; i++) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i++) { int thisres = 0; __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); thisres |= test( w, dt.expected[i] ); __asm __volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); thisres |= test( w, dt.expected[i] ); if (thisres) printf( "Failure 3 %d %a %a\n", i, w, dt.expected[i] ); res |= thisres; thisres = 0; __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w) : "0" (dt.x[i]), "m" (dt.y[i]), "x" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "x" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w) : "0" (dt.x[i]), "x" (dt.y[i]), "m" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "x" (dt.y[i]), "0" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); __asm __volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w) : "x" (dt.x[i]), "m" (dt.y[i]), "0" (dt.z[i])); thisres |= test( -w, dt.expected[i] ); if (thisres) printf( "Failure 4 %d %a %a\n", i, w, dt.expected[i] ); res |= thisres; } for (i = 0; i < N; i++) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i += 2) { int thisres = 0; __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" "vfmadd132pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" "vfmadd132pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" "vfmadd213pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" "vfmadd213pd (%3), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" "vfmadd231pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" "vfmadd231pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 5 %d", i ); for (j = 0; j < 2; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; thisres = 0; __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" "vfnmsub132pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" "vfnmsub132pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" "vfnmsub213pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" "vfnmsub213pd (%3), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" "vfnmsub231pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" "vfnmsub231pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 6 %d", i ); for (j = 0; j < 2; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 0; i < N; i++) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i += 2) { int thisres = 0; __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" "vfmsub132pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" "vfmsub132pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" "vfmsub213pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" "vfmsub213pd (%3), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" "vfmsub231pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" "vfmsub231pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 7 %d", i ); for (j = 0; j < 2; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; thisres = 0; __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" "vfnmadd132pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" "vfnmadd132pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" "vfnmadd213pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" "vfnmadd213pd (%3), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" "vfnmadd231pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" "vfnmadd231pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 8 %d", i ); for (j = 0; j < 2; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 1; i < N; i += 2) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i += 2) { int thisres = 0; __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" "vfmaddsub132pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" "vfmaddsub132pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" "vfmaddsub213pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" "vfmaddsub213pd (%3), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" "vfmaddsub231pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" "vfmaddsub231pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 9 %d", i ); for (j = 0; j < 2; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 0; i < N; i++) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i += 2) { int thisres = 0; __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;" "vfmsubadd132pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;" "vfmsubadd132pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;" "vfmsubadd213pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;" "vfmsubadd213pd (%3), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;" "vfmsubadd231pd %%xmm7, %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;" "vfmsubadd231pd (%2), %%xmm8, %%xmm9;" "vmovapd %%xmm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 2; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 10 %d", i ); for (j = 0; j < 2; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 1; i < N; i += 2) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i += 4) { int thisres = 0; __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" "vfmadd132pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" "vfmadd132pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" "vfmadd213pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" "vfmadd213pd (%3), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" "vfmadd231pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" "vfmadd231pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 11 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; thisres = 0; __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" "vfnmsub132pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" "vfnmsub132pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" "vfnmsub213pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" "vfnmsub213pd (%3), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" "vfnmsub231pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" "vfnmsub231pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 12 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 0; i < N; i++) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i += 4) { int thisres = 0; __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" "vfmsub132pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" "vfmsub132pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" "vfmsub213pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" "vfmsub213pd (%3), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" "vfmsub231pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" "vfmsub231pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 13 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; thisres = 0; __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" "vfnmadd132pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" "vfnmadd132pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" "vfnmadd213pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" "vfnmadd213pd (%3), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" "vfnmadd231pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" "vfnmadd231pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( -dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 14 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 1; i < N; i += 2) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i += 4) { int thisres = 0; __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" "vfmaddsub132pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" "vfmaddsub132pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" "vfmaddsub213pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" "vfmaddsub213pd (%3), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" "vfmaddsub231pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" "vfmaddsub231pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 15 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 0; i < N; i++) dt.z[i] = -dt.z[i]; for (i = 0; i < N; i += 4) { int thisres = 0; __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;" "vfmsubadd132pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;" "vfmsubadd132pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;" "vfmsubadd213pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;" "vfmsubadd213pd (%3), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;" "vfmsubadd231pd %%ymm7, %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); __asm __volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;" "vfmsubadd231pd (%2), %%ymm8, %%ymm9;" "vmovapd %%ymm9, (%0)" : : "r" (&dt.res[i]), "r" (&dt.x[i]), "r" (&dt.y[i]), "r" (&dt.z[i]) : "xmm7", "xmm8", "xmm9"); for (j = 0; j < 4; j++) thisres |= test( dt.res[i+j], dt.expected[i+j] ); if (thisres) { printf( "Failure 16 %d", i ); for (j = 0; j < 4; j++) printf( " %a %a", dt.res[i+j], dt.expected[i+j] ); printf( "\n" ); } res |= thisres; } for (i = 1; i < N; i += 2) dt.z[i] = -dt.z[i]; return res; } int main( ) { int res = 0; int i = 0; plus_zero = 0.0; __asm __volatile__ ("" : : "r" (&plus_zero) : "memory"); nan_value = plus_zero / plus_zero; plus_infty = 3.40282346638528859812e+38F * 16.0F; minus_infty = -plus_infty; #define TEST_F( a, b, c, d ) \ do { \ ft.x[i] = a; \ ft.y[i] = b; \ ft.z[i] = c; \ ft.expected[i] = d; \ i++; \ } while (0) TEST_F( 1.0, 2.0, 3.0, 5.0 ); TEST_F( nan_value, 2.0, 3.0, nan_value ); TEST_F( 1.0, nan_value, 3.0, nan_value ); TEST_F( 1.0, 2.0, nan_value, nan_value ); TEST_F( plus_infty, 0.0, nan_value, nan_value ); TEST_F( minus_infty, 0.0, nan_value, nan_value ); TEST_F( 0.0, plus_infty, nan_value, nan_value ); TEST_F( 0.0, minus_infty, nan_value, nan_value ); TEST_F( plus_infty, 0.0, 1.0, nan_value ); TEST_F( minus_infty, 0.0, 1.0, nan_value ); TEST_F( 0.0, plus_infty, 1.0, nan_value ); TEST_F( 0.0, minus_infty, 1.0, nan_value ); TEST_F( plus_infty, plus_infty, minus_infty, nan_value ); TEST_F( minus_infty, plus_infty, plus_infty, nan_value ); TEST_F( plus_infty, minus_infty, plus_infty, nan_value ); TEST_F( minus_infty, minus_infty, minus_infty, nan_value ); TEST_F( plus_infty, 3.5L, minus_infty, nan_value ); TEST_F( minus_infty, -7.5L, minus_infty, nan_value ); TEST_F( -13.5L, plus_infty, plus_infty, nan_value ); TEST_F( minus_infty, 7.5L, plus_infty, nan_value ); TEST_F( 1.25L, 0.75L, 0.0625L, 1.0L ); TEST_F( -3.40282346638528859812e+38F, -3.40282346638528859812e+38F, minus_infty, minus_infty ); TEST_F( 3.40282346638528859812e+38F / 2, 3.40282346638528859812e+38F / 2, minus_infty, minus_infty ); TEST_F( -3.40282346638528859812e+38F, 3.40282346638528859812e+38F, plus_infty, plus_infty ); TEST_F( 3.40282346638528859812e+38F / 2, -3.40282346638528859812e+38F / 4, plus_infty, plus_infty ); TEST_F( plus_infty, 4, plus_infty, plus_infty ); TEST_F( 2, minus_infty, minus_infty, minus_infty ); TEST_F( minus_infty, minus_infty, plus_infty, plus_infty ); TEST_F( plus_infty, minus_infty, minus_infty, minus_infty ); TEST_F( 0x1.7ff8p+13, 0x1.000002p+0, 0x1.ffffp-24, 0x1.7ff802p+13 ); TEST_F( 0x1.fffp+0, 0x1.00001p+0, -0x1.fffp+0, 0x1.fffp-20 ); TEST_F( 0x1.9abcdep+127, 0x0.9abcdep-126, -0x1.f08948p+0, 0x1.bb421p-25 ); TEST_F( 0x1.9abcdep+100, 0x0.9abcdep-126, -0x1.f08948p-27, 0x1.bb421p-52 ); TEST_F( 0x1.fffffep+127, 0x1.001p+0, -0x1.fffffep+127, 0x1.fffffep+115 ); TEST_F( -0x1.fffffep+127, 0x1.fffffep+0, 0x1.fffffep+127, -0x1.fffffap+127 ); TEST_F( 0x1.fffffep+127, 2.0, -0x1.fffffep+127, 0x1.fffffep+127 ); res |= test_fmaf( ); i = 0; #define TEST( a, b, c, d ) \ do { \ dt.x[i] = a; \ dt.y[i] = b; \ dt.z[i] = c; \ dt.expected[i] = d; \ i++; \ } while (0) TEST( 1.0, 2.0, 3.0, 5.0 ); TEST( nan_value, 2.0, 3.0, nan_value ); TEST( 1.0, nan_value, 3.0, nan_value ); TEST( 1.0, 2.0, nan_value, nan_value ); TEST( plus_infty, 0.0, nan_value, nan_value ); TEST( minus_infty, 0.0, nan_value, nan_value ); TEST( 0.0, plus_infty, nan_value, nan_value ); TEST( 0.0, minus_infty, nan_value, nan_value ); TEST( plus_infty, 0.0, 1.0, nan_value ); TEST( minus_infty, 0.0, 1.0, nan_value ); TEST( 0.0, plus_infty, 1.0, nan_value ); TEST( 0.0, minus_infty, 1.0, nan_value ); TEST( plus_infty, plus_infty, minus_infty, nan_value ); TEST( minus_infty, plus_infty, plus_infty, nan_value ); TEST( plus_infty, minus_infty, plus_infty, nan_value ); TEST( minus_infty, minus_infty, minus_infty, nan_value ); TEST( plus_infty, 3.5L, minus_infty, nan_value ); TEST( minus_infty, -7.5L, minus_infty, nan_value ); TEST( -13.5L, plus_infty, plus_infty, nan_value ); TEST( minus_infty, 7.5L, plus_infty, nan_value ); TEST( 1.25L, 0.75L, 0.0625L, 1.0L ); TEST( -1.79769313486231570815e+308L, -1.79769313486231570815e+308L, minus_infty, minus_infty ); TEST( 1.79769313486231570815e+308L / 2, 1.79769313486231570815e+308L / 2, minus_infty, minus_infty ); TEST( -1.79769313486231570815e+308L, 1.79769313486231570815e+308L, plus_infty, plus_infty ); TEST( 1.79769313486231570815e+308L / 2, -1.79769313486231570815e+308L / 4, plus_infty, plus_infty ); TEST( plus_infty, 4, plus_infty, plus_infty ); TEST( 2, minus_infty, minus_infty, minus_infty ); TEST( minus_infty, minus_infty, plus_infty, plus_infty ); TEST( plus_infty, minus_infty, minus_infty, minus_infty ); TEST( 0x1.7fp+13, 0x1.0000000000001p+0, 0x1.ffep-48, 0x1.7f00000000001p+13 ); TEST( 0x1.fffp+0, 0x1.0000000000001p+0, -0x1.fffp+0, 0x1.fffp-52 ); TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, 0x1p-300, 1.0 ); TEST( 0x1.0000002p+0, 0x1.ffffffcp-1, -0x1p-300, 0x1.fffffffffffffp-1 ); TEST( 0x1.deadbeef2feedp+1023, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp+1, 0x1.0989687bc9da4p-53 ); TEST( 0x1.deadbeef2feedp+900, 0x0.deadbeef2feedp-1022, -0x1.a05f8c01a4bfbp-122, 0x1.0989687bc9da4p-176 ); TEST( 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011 ); TEST( -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023 ); TEST( 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023 ); TEST( 0x1.6a09e667f3bccp-538, 0x1.6a09e667f3bccp-538, 0.0, 0.0 ); TEST( 0x1.deadbeef2feedp-495, 0x1.deadbeef2feedp-495, -0x1.bf86a5786a574p-989, 0x0.0000042625a1fp-1022 ); TEST( 0x1.deadbeef2feedp-503, 0x1.deadbeef2feedp-503, -0x1.bf86a5786a574p-1005, 0x0.0000000004262p-1022 ); TEST( 0x1p-537, 0x1p-538, 0x1p-1074, 0x0.0000000000002p-1022 ); TEST( 0x1.7fffff8p-968, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000001p-1022 ); TEST( 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022 ); TEST( 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022 ); TEST( -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d59p-1022 ); TEST( -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5dp-1022 ); TEST( 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe9p-1022 ); TEST( -0x1.fffffffffffffp-711, 0x1.fffffffffffffp-275, 0x1.fffffe00007ffp-983, 0x1.7ffffe00007ffp-983 ); res |= test_fma( ); if (res == 0) printf( "Testing successful\n"); return 0; }