#include #include #include #ifdef HAS_ISA_2_06 #include #include #include #define NUM_ARGS 14 #define MAX_RESULTS NUM_ARGS #define START_i 0 #define STOP_i 14 #define START_j 0 #define STOP_j 14 #define START_k 0 #define STOP_k 14 union convert_t { unsigned int u; float f; } convert; #define PRINT_FLOAT 0 #define PRINT_HEX 1 void print_vector_elements(vector unsigned long src) { unsigned int element0, element1, element2, element3; element0 = (unsigned int)((src[0] >> 32) & 0xFFFFFFFF); element1 = (unsigned int)(src[0] & 0xFFFFFFFF); element2 = (unsigned int)((src[1] >> 32) & 0xFFFFFFFF); element3 = (unsigned int)(src[1] & 0xFFFFFFFF); convert.u = element0; #if PRINT_FLOAT printf(" %8.4e,", convert.f); #endif #if PRINT_HEX printf(" 0x%08x,", convert.u); #endif convert.u = element1; #if PRINT_FLOAT printf(" %8.4e,", convert.f); #endif #if PRINT_HEX printf(" 0x%08x,", convert.u); #endif convert.u = element2; #if PRINT_FLOAT printf(" %8.4e,", convert.f); #endif #if PRINT_HEX printf(" 0x%08x,", convert.u); #endif convert.u = element3; #if PRINT_FLOAT printf(" %8.4e", convert.f); #endif #if PRINT_HEX printf(" 0x%08x", convert.u); #endif printf("\n"); } void do_tests( void ) { vector unsigned long srcA, srcB, srcC, arg_list[NUM_ARGS], dst; int i, j, k; arg_list[0][0] = 0x8000012480000124ULL; arg_list[0][1] = 0x8000012480000124ULL; arg_list[1][0] = 0x8000012480000124ULL; arg_list[1][1] = 0x8000012480000124ULL; arg_list[2][0] = 0x000000060000000CULL; arg_list[2][1] = 0x000000080000000AULL; arg_list[3][0] = 0xFFFFFAFAFFFFFBFBULL; arg_list[3][1] = 0xFFFFFEFCFFFFFDFDULL; arg_list[4][0] = 0x0000024800000258ULL; arg_list[4][1] = 0x0000026800000278ULL; arg_list[5][0] = 0x0000000200000002ULL; arg_list[5][1] = 0x0000000400000008ULL; arg_list[6][0] = 0xC3000000C3020000ULL; arg_list[6][1] = 0xC2FE0000C2FD0000ULL; arg_list[7][0] = 0xc3000000c3020000ULL; arg_list[7][1] = 0xc2fe0000c2fd0000ULL; arg_list[8][0] = 0xC2FE0000C2FD0000ULL; arg_list[8][1] = 0x0000000400000002ULL; arg_list[9][0] = 0xC2FE0000C2FD0000ULL; arg_list[9][1] = 0x7E9676997E700022ULL; arg_list[10][0] = 0x80B0000180B00002ULL; arg_list[10][1] = 0x80B0000480B00006ULL; arg_list[11][0] = 0x80B0000080B00001ULL; arg_list[11][1] = 0x80B0000280B00004ULL; arg_list[12][0] = 0x4E8000004E800000ULL; arg_list[12][1] = 0x4E8000004E800000ULL; arg_list[13][0] = 0xB0000000B0000000ULL; arg_list[13][1] = 0xB0000000B0000000ULL; for ( i = START_i; i < STOP_i; i++) { dst[0] = 0xFFFFFFFFFFFFFFFF; dst[1] = 0xFFFFFFFFFFFFFFFF; srcA[0] = arg_list[i][0]; srcA[1] = arg_list[i][1]; printf ("srcA = 0x%016lx 0x%016lx\n\n", srcA[1], srcA[0]); __asm__ __volatile__ ("vcfsx %0,%1,31" : "=v" (dst): "v" (srcA)); printf (" vcfsx(srcA) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vcfux %0,%1,31" : "=v" (dst): "v" (srcA)); printf (" vcfux(srcA) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vrfim %0,%1" : "=v" (dst): "v" (srcA)); printf (" vrfim(srcA) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vrfin %0,%1" : "=v" (dst): "v" (srcA)); printf (" vrfin(srcA) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vrfiz %0,%1" : "=v" (dst): "v" (srcA)); printf (" vrfiz(srcA) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vexptefp %0,%1" : "=v" (dst): "v" (srcA)); printf (" vexptefp(srcA) result = "); print_vector_elements(dst); /* Smallest representable floating point input does not generate a subnormal result. */ __asm__ __volatile__ ("vlogefp %0,%1" : "=v" (dst): "v" (srcA)); printf (" vlogefp(srcA) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vrefp %0,%1" : "=v" (dst): "v" (srcA)); printf (" vrefp(srcA) result = "); print_vector_elements(dst); /* Square root of the smallest representable number is a normal number. Square root can't generate a subnormal result. */ __asm__ __volatile__ ("vrsqrtefp %0,%1" : "=v" (dst): "v" (srcA)); printf (" vrsqrtefp(srcA) result = 0x%016lx 0x%016lx\n\n", dst[1], dst[0]); for ( j = START_j; j < STOP_j; j++) { srcB[0] = arg_list[j][0]; srcB[1] = arg_list[j][1]; dst[0] = 0xFFFFFFFFFFFFFFFF; dst[1] = 0xFFFFFFFFFFFFFFFF; printf ("srcB = 0x%016lx 0x%016lx\n\n", srcB[1], srcB[0]); __asm__ __volatile__ ("vaddfp %0,%1,%2" : "=v" (dst): "v" (srcA), "v" (srcB)); printf (" vaddfp(srcA,srcB) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vsubfp %0,%1,%2" : "=v" (dst): "v" (srcA), "v" (srcB)); printf (" vsubfp(srcA,srcB) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vmaxfp %0,%1,%2" : "=v" (dst): "v" (srcA), "v" (srcB)); printf (" vmax(srcA,srcB) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vminfp %0,%1,%2" : "=v" (dst): "v" (srcA), "v" (srcB)); printf (" vmin(srcA,srcB) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vcmpbfp %0,%1,%2" : "=v" (dst): "v" (srcA), "v" (srcB)); printf (" vcmpbfp(srcA,srcB) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vcmpeqfp %0,%1,%2" : "=v" (dst): "v" (srcA), "v" (srcB)); printf (" vcmpeqfp(srcA,srcB) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vcmpgefp %0,%1,%2" : "=v" (dst): "v" (srcA), "v" (srcB)); printf (" vcmpgefp(srcA,srcB) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vcmpgtfp %0,%1,%2" : "=v" (dst): "v" (srcA), "v" (srcB)); printf (" vcmpgtfp(srcA,srcB) result = "); print_vector_elements(dst); for ( k = START_k; k < STOP_k; k++) { srcC[0] = arg_list[k][0]; srcC[1] = arg_list[k][1]; dst[0] = 0xFFFFFFFFFFFFFFFF; dst[1] = 0xFFFFFFFFFFFFFFFF; printf ("srcC = 0x%016lx 0x%016lx\n\n", srcC[1], srcC[0]); __asm__ __volatile__ ("vmaddfp %0,%1,%2,%3" : "=v" (dst): "v" (srcA), "v" (srcC), "v" (srcB)); printf("i=%d, j=%d, k=%d ", i, j, k); printf (" vmaddfp(srcA,srcC,srcB) result = "); print_vector_elements(dst); __asm__ __volatile__ ("vnmsubfp %0,%1,%2,%3" : "=v" (dst): "v" (srcA), "v" (srcC), "v" (srcB)); printf("i=%d, j=%d, k=%d ", i, j, k); printf (" vnmsubfp(srcA,srcC,srcB) result = "); print_vector_elements(dst); } } } return; } #endif int main() { #ifdef HAS_ISA_2_06 register vector unsigned long vreg, initial_vscr_value; /* The VSCR[NJ] bit controlls how subnormal (denormal) results are handled * by the various vector float instructions. * * If VSCR[NJ] = 0 then the subnormal result is returned. * If VSCR[NJ] = 1 then the subnormal result is set to zero the sign is * not changed. * * This tests verifies the VSCR[NJ] functionality for the vector float * instructions. */ /* Save the VSCR setting and restore it at the end. Don't want to screw * up other tests with a different setting of the VSCR. */ __asm__ __volatile__ ("mfvscr %0" : "=v"(initial_vscr_value)); // set VSCR[nj] to 0 and run test printf("Attempt to set VSR[NJ] to 0, run test\n"); vreg[0] = 0; vreg[1] = 0; __asm__ __volatile__ ("mtvscr %0" :: "v"(vreg)); // Read VSCR __asm__ __volatile__ ("mfvscr %0" : "=v"(vreg)); #ifdef VGP_ppc64le_linux printf("vscr set to 0x%lx\n", vreg[0]); #else printf("vscr set to 0x%lx\n", vreg[1]); #endif do_tests(); // set VSCR[nj] to 1 and run test printf("Attempt to set VSR[NJ] to 1, run test\n"); /* The upper 95 bits are set to zero when writting to the register. So, I can get away with setting the NJ bit for BE and LE and the HW will take care of clearing the one I don't want. */ vreg[0] = 1 << (127 - 111); // Sets bit for LE case vreg[1] = 1 << (127 - 111); // Sets bit for BE case __asm__ __volatile__ ("mtvscr %0" :: "v"(vreg)); vreg[0] = 0; vreg[1] = 0; // Read VSCR __asm__ __volatile__ ("mfvscr %0" : "=v"(vreg)); #ifdef VGP_ppc64le_linux printf("vscr set to 0x%lx\n", vreg[0]); #else printf("vscr set to 0x%lx\n", vreg[1]); #endif do_tests(); /* Restore the VSCR settings. */ __asm__ __volatile__ ("mtvscr %0" :: "v"(initial_vscr_value)); #else printf("No ISA 2.06 support\n"); #endif return 0; }