/* Vectorized routines for x86 Advanced Vector Extensions (AVX) * * Most speed-critical code is in the .h file, to facilitate inlining. * * Contents: * 1. Debugging/development routines * 2. Benchmark * 3. Unit tests * 4. Test driver * * This code is conditionally compiled, only when was * set in by the configure script, and that will only * happen on x86 platforms. When is not set, we * include some dummy code to silence compiler and ranlib warnings * about empty translation units and no symbols, and dummy drivers * that do nothing but declare success. */ #include #ifdef eslENABLE_AVX #include #include #include "easel.h" #include "esl_avx.h" /***************************************************************** * 1. Debugging/development routines *****************************************************************/ void esl_avx_dump_256i_hex4(__m256i v) { uint64_t *val = (uint64_t *) &v; printf("%016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n", val[3], val[2], val[1], val[0]); } /***************************************************************** * 2. Benchmark *****************************************************************/ #ifdef eslAVX_BENCHMARK #include #include #include #include #include "easel.h" #include "esl_avx.h" #include "esl_getopts.h" #include "esl_random.h" #include "esl_stopwatch.h" static ESL_OPTIONS options[] = { /* name type default env range toggles reqs incomp help docgroup*/ { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 }, { "-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to ", 0 }, { "-N", eslARG_INT, "200000000", NULL, NULL, NULL, NULL, NULL, "number of trials", 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, }; static char usage[] = "[-options] "; static char banner[] = "benchmark driver for avx module"; int main(int argc, char **argv) { union { __m256i v; uint32_t x[8]; } u; ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_STOPWATCH *w = esl_stopwatch_Create(); char *fname = esl_opt_GetArg(go, 1); int N = esl_opt_GetInteger(go, "-N"); __m256i *v; int i,z; /* A bunch of vectors full of random numbers. Takes ~10-20s to generate the data */ v = malloc(sizeof(__m256i) * N); for (i = 0; i < N; i++) { for (z = 0; z < 8; z++) u.x[z] = esl_random_uint32(rng); v[i] = u.v; } esl_stopwatch_Start(w); if (strcmp(fname, "hmax_epi8") == 0) { int8_t r_i8, max_i8; max_i8 = -128; for (i = 0; i < N; i++) { r_i8 = esl_avx_hmax_epi8(v[i]); max_i8 = ESL_MAX(max_i8, r_i8); } printf("max_i8 = %" PRId8 "\n", max_i8); } else if (strcmp(fname, "hmax_epu8") == 0) { uint8_t r_u8, max_u8; max_u8 = 0; for (i = 0; i < N; i++) { r_u8 = esl_avx_hmax_epu8(v[i]); max_u8 = ESL_MAX(max_u8, r_u8); } printf("max_u8 = %" PRIu8 "\n", max_u8); } else if (strcmp(fname, "hmax_epi16") == 0) { int16_t r_i16, max_i16; max_i16 = -32768; for (i = 0; i < N; i++) { r_i16 = esl_avx_hmax_epi16(v[i]); max_i16 = ESL_MAX(max_i16, r_i16); } printf("max_i16 = %" PRIi16 "\n", max_i16); } else esl_fatal("No such esl_avx_* function %s\n", fname); esl_stopwatch_Stop(w); printf("# %s", fname); esl_stopwatch_Display(stdout, w, " CPU time: "); esl_randomness_Destroy(rng); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; } #endif /*eslAVX_BENCHMARK*/ /***************************************************************** * 3. Unit tests *****************************************************************/ #ifdef eslAVX_TESTDRIVE #include "esl_random.h" static void utest_hmax_epu8(ESL_RANDOMNESS *rng) { union { __m256i v; uint8_t x[32]; } u; uint8_t r1, r2; int i,z; for (i = 0; i < 100; i++) { r1 = 0; for (z = 0; z < 32; z++) { u.x[z] = (uint8_t) (esl_rnd_Roll(rng, 256)); // 0..255 if (u.x[z] > r1) r1 = u.x[z]; } r2 = esl_avx_hmax_epu8(u.v); if (r1 != r2) esl_fatal("hmax_epu8 utest failed"); } } static void utest_hmax_epi8(ESL_RANDOMNESS *rng) { union { __m256i v; int8_t x[32]; } u; int8_t r1, r2; int i,z; for (i = 0; i < 100; i++) { r1 = 0; for (z = 0; z < 32; z++) { u.x[z] = (int8_t) (esl_rnd_Roll(rng, 256) - 128); // -128..127 if (u.x[z] > r1) r1 = u.x[z]; } r2 = esl_avx_hmax_epi8(u.v); if (r1 != r2) esl_fatal("hmax_epi8 utest failed"); } } static void utest_hmax_epi16(ESL_RANDOMNESS *rng) { union { __m256i v; int16_t x[16]; } u; int16_t r1, r2; int i,z; for (i = 0; i < 100; i++) { r1 = -32768; for (z = 0; z < 16; z++) { u.x[z] = (int16_t) (esl_rnd_Roll(rng, 65536) - 32768); // -32768..32767 if (u.x[z] > r1) r1 = u.x[z]; } r2 = esl_avx_hmax_epi16(u.v); if (r1 != r2) esl_fatal("hmax_epi16 utest failed %d %d", r1, r2); } } #endif /*eslAVX_TESTDRIVE*/ /***************************************************************** * 4. Test driver *****************************************************************/ #ifdef eslAVX_TESTDRIVE #include #include #include #include "easel.h" #include "esl_getopts.h" #include "esl_random.h" #include "esl_avx.h" static ESL_OPTIONS options[] = { /* name type default env range toggles reqs incomp help docgroup*/ { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 }, { "-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to ", 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, }; static char usage[] = "[-options]"; static char banner[] = "test driver for avx512 module"; int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));; fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_hmax_epu8(rng); utest_hmax_epi8(rng); utest_hmax_epi16(rng); fprintf(stderr, "# status = ok\n"); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return 0; } #endif /*eslAVX_TESTDRIVE*/ #else // !eslENABLE_AVX #include void esl_avx_silence_hack(void) { return; } #if defined eslAVX_TESTDRIVE || eslAVX_EXAMPLE || eslAVX_BENCHMARK int main(void) { fprintf(stderr, "# AVX support not compiled.\n"); return 0; } #endif #endif // eslENABLE_AVX