/* Copyright (C) 2009, 2011 William Hart This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include #include #include #include #include "flint.h" #include "ulong_extras.h" #include "fft.h" int main(void) { flint_bitcnt_t depth, w, depth1, w1; clock_t start, end; double elapsed; double best = 0.0; mp_size_t best_off, off, best_d, best_w; FLINT_TEST_INIT(state); flint_printf("/* fft_tuning.h -- autogenerated by tune-fft */\n\n"); flint_printf("#ifndef FFT_TUNING_H\n"); flint_printf("#define FFT_TUNING_H\n\n"); flint_printf("#include \"gmp.h\"\n\n"); flint_printf("#define FFT_TAB \\\n"); fflush(stdout); _flint_rand_init_gmp(state); flint_printf(" { "); fflush(stdout); for (depth = 6; depth <= 10; depth++) { flint_printf("{ "); fflush(stdout); for (w = 1; w <= 2; w++) { int iters = 100*((mp_size_t) 1 << (3*(10 - depth)/2)), i; mp_size_t n = (UWORD(1)<gmp_state, b1); flint_mpn_urandomb(i2, state->gmp_state, b2); best_off = -1; for (off = 0; off <= 4; off++) { start = clock(); for (i = 0; i < iters; i++) mul_truncate_sqrt2(r1, i1, n1, i2, n2, depth - off, w*((mp_size_t)1 << (off*2))); end = clock(); elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; if (elapsed < best || best_off == -1) { best_off = off; best = elapsed; } } flint_printf("%wd", best_off); if (w != 2) flint_printf(","); flint_printf(" "); fflush(stdout); flint_free(i1); } flint_printf("}"); if (depth != 10) flint_printf(","); flint_printf(" "); fflush(stdout); } flint_printf("}\n\n"); best_d = 12; best_w = 1; best_off = -1; flint_printf("#define MULMOD_TAB \\\n"); fflush(stdout); flint_printf(" { "); fflush(stdout); for (depth = 12; best_off != 1 ; depth++) { for (w = 1; w <= 2; w++) { int iters, i; mp_size_t n = (UWORD(1)<gmp_state, int_limbs*FLINT_BITS); flint_mpn_urandomb(i2, state->gmp_state, int_limbs*FLINT_BITS); i1[int_limbs] = 0; i2[int_limbs] = 0; depth1 = FLINT_CLOG2(bits); depth1 = depth1/2; w1 = bits/(UWORD(1)<<(2*depth1)); best_off = -1; for (off = 0; off <= 4; off++) { start = clock(); for (i = 0; i < iters; i++) _fft_mulmod_2expp1(r1, i1, i2, int_limbs, depth1 - off, w1*((mp_size_t)1 << (off*2))); end = clock(); elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; if (best_off == -1 || elapsed < best) { best_off = off; best = elapsed; } } start = clock(); for (i = 0; i < iters; i++) flint_mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt); end = clock(); elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; if (elapsed < best) { best_d = depth + (w == 2); best_w = w + 1 - 2*(w == 2); } flint_printf("%wd", best_off); if (w != 2) flint_printf(", "); fflush(stdout); flint_free(i1); } flint_printf(", "); fflush(stdout); } flint_printf("1 }\n\n"); flint_printf("#define FFT_N_NUM %wd\n\n", 2*(depth - 12) + 1); flint_printf("#define FFT_MULMOD_2EXPP1_CUTOFF %wd\n\n", ((mp_limb_t) 1 << best_d)*best_w/(2*FLINT_BITS)); flint_randclear(state); flint_printf("#endif\n"); return 0; }