/* Copyright (C) 2014 Fredrik Johansson This file is part of Arb. Arb is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include "arb.h" /* See verify_taylor.py for code to generate tables and proof of correctness */ #define TMP_ALLOC_LIMBS(size) TMP_ALLOC((size) * sizeof(mp_limb_t)) #define FACTORIAL_TAB_SIZE 288 ARB_DLL extern const mp_limb_t factorial_tab_numer[FACTORIAL_TAB_SIZE]; ARB_DLL extern const mp_limb_t factorial_tab_denom[FACTORIAL_TAB_SIZE]; void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int sinonly, int alternating) { mp_ptr s, t, xpow; mp_limb_t new_denom, old_denom, c; slong power, k, m; int cosorsin; TMP_INIT; TMP_START; if (2 * N >= FACTORIAL_TAB_SIZE - 1) { flint_printf("_arb_sin_cos_taylor_rs: N too large!\n"); flint_abort(); } if (N <= 1) { if (N == 0) { flint_mpn_zero(ysin, xn); if (!sinonly) flint_mpn_zero(ycos, xn); error[0] = 0; } else if (N == 1) { flint_mpn_copyi(ysin, x, xn); if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES); error[0] = 1; } } else { /* Choose m ~= sqrt(num_terms) (m must be even, >= 2) */ m = 2; while (m * m < N) m += 2; /* todo: merge allocations */ xpow = TMP_ALLOC_LIMBS((m + 1) * xn); s = TMP_ALLOC_LIMBS(xn + 2); t = TMP_ALLOC_LIMBS(2 * xn + 2); /* todo: 1 limb too much? */ /* higher index ---> */ /* | ---xn--- | */ /* xpow = | | x^m | x^(m-1) | ... | x^2 | x | */ #define XPOW_WRITE(__k) (xpow + (m - (__k)) * xn) #define XPOW_READ(__k) (xpow + (m - (__k) + 1) * xn) mpn_sqr(XPOW_WRITE(1), x, xn); mpn_sqr(XPOW_WRITE(2), XPOW_READ(1), xn); for (k = 4; k <= m; k += 2) { mpn_mul_n(XPOW_WRITE(k - 1), XPOW_READ(k / 2), XPOW_READ(k / 2 - 1), xn); mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn); } for (cosorsin = sinonly; cosorsin < 2; cosorsin++) { flint_mpn_zero(s, xn + 1); /* todo: skip one nonscalar multiplication (use x^m) when starting on x^0 */ power = (N - 1) % m; for (k = N - 1; k >= 0; k--) { c = factorial_tab_numer[2 * k + cosorsin]; new_denom = factorial_tab_denom[2 * k + cosorsin]; old_denom = factorial_tab_denom[2 * k + cosorsin + 2]; /* change denominators */ if (new_denom != old_denom && k < N - 1) { if (alternating && (k % 2 == 0)) s[xn] += old_denom; mpn_divrem_1(s, 0, s, xn + 1, old_denom); if (alternating && (k % 2 == 0)) s[xn] -= 1; } if (power == 0) { /* add c * x^0 -- only top limb is affected */ if (alternating & k) s[xn] -= c; else s[xn] += c; /* Outer polynomial evaluation: multiply by x^m */ if (k != 0) { mpn_mul(t, s, xn + 1, XPOW_READ(m), xn); flint_mpn_copyi(s, t + xn, xn + 1); } power = m - 1; } else { if (alternating & k) s[xn] -= mpn_submul_1(s, XPOW_READ(power), xn, c); else s[xn] += mpn_addmul_1(s, XPOW_READ(power), xn, c); power--; } } /* finally divide by denominator */ if (cosorsin == 0) { mpn_divrem_1(t, 0, s, xn + 1, factorial_tab_denom[0]); /* perturb down to a number < 1 if necessary. note that this does not invalidate the error bound: 1 - ulp is either 1 ulp too small or must be closer to the exact value */ if (t[xn] == 0) flint_mpn_copyi(ycos, t, xn); else flint_mpn_store(ycos, xn, LIMB_ONES); } else { mpn_divrem_1(s, 0, s, xn + 1, factorial_tab_denom[0]); mpn_mul(t, s, xn + 1, x, xn); flint_mpn_copyi(ysin, t + xn, xn); } } /* error bound (ulp) */ error[0] = 2; } TMP_END; }