/* Copyright (C) 2014 Fredrik Johansson This file is part of Arb. Arb is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include "arb.h" #define TMP_ALLOC_LIMBS(__n) TMP_ALLOC((__n) * sizeof(mp_limb_t)) #define MAGLIM(prec) FLINT_MAX(128, 2 * (prec)) void arb_exp_arf_huge(arb_t z, const arf_t x, slong mag, slong prec, int minus_one) { arb_t ln2, t, u; fmpz_t q; slong wp; arb_init(ln2); arb_init(t); arb_init(u); fmpz_init(q); wp = prec + mag + 10; arb_const_log2(ln2, wp); arb_set_arf(t, x); arb_div(u, t, ln2, wp); arf_get_fmpz(q, arb_midref(u), ARF_RND_DOWN); arb_submul_fmpz(t, ln2, q, wp); arb_exp(z, t, prec); arb_mul_2exp_fmpz(z, z, q); if (minus_one) arb_sub_ui(z, z, 1, prec); arb_clear(ln2); arb_clear(t); arb_clear(u); fmpz_clear(q); } /* |x| >= 2^expbound */ static void arb_exp_arf_overflow(arb_t z, slong expbound, int negative, int minus_one, slong prec) { if (!negative) { arf_zero(arb_midref(z)); mag_inf(arb_radref(z)); } else { /* x <= -2^expbound ==> 0 < exp(x) <= 2^(-2^expbound) */ fmpz_t t; fmpz_init(t); fmpz_set_si(t, -1); fmpz_mul_2exp(t, t, expbound); arf_one(arb_midref(z)); mag_one(arb_radref(z)); arb_mul_2exp_fmpz(z, z, t); if (minus_one) arb_sub_ui(z, z, 1, prec); fmpz_clear(t); } } static void arb_exp_arf_fallback(arb_t z, const arf_t x, slong mag, slong prec, int minus_one) { /* reduce by log(2) if needed, but avoid computing log(2) unnecessarily at extremely high precision */ if (mag > 64 || (mag > 8 && prec < 1000000)) arb_exp_arf_huge(z, x, mag, prec, minus_one); else if (prec < 19000) arb_exp_arf_rs_generic(z, x, prec, minus_one); else arb_exp_arf_bb(z, x, prec, minus_one); } void arb_exp_arf(arb_t z, const arf_t x, slong prec, int minus_one, slong maglim) { if (arf_is_special(x)) { if (minus_one) { if (arf_is_zero(x)) arb_zero(z); else if (arf_is_pos_inf(x)) arb_pos_inf(z); else if (arf_is_neg_inf(x)) arb_set_si(z, -1); else arb_indeterminate(z); } else { if (arf_is_zero(x)) arb_one(z); else if (arf_is_pos_inf(x)) arb_pos_inf(z); else if (arf_is_neg_inf(x)) arb_zero(z); else arb_indeterminate(z); } } else if (COEFF_IS_MPZ(ARF_EXP(x))) { if (fmpz_sgn(ARF_EXPREF(x)) > 0) { /* huge input */ arb_exp_arf_overflow(z, maglim, ARF_SGNBIT(x), minus_one, prec); } else { /* |exp(x) - (1 + x)| <= |x^2| */ fmpz_t t; int inexact; fmpz_init(t); fmpz_mul_2exp(t, ARF_EXPREF(x), 1); inexact = arf_add_ui(arb_midref(z), x, minus_one ? 0 : 1, prec, ARB_RND); mag_one(arb_radref(z)); mag_mul_2exp_fmpz(arb_radref(z), arb_radref(z), t); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); fmpz_clear(t); } } else { slong exp, wp, wn, N, r, wprounded, finaln; fmpz_t n; mp_ptr tmp, w, t, u, finalvalue; mp_limb_t p1, q1bits, p2, q2bits, error, error2; int negative, inexact; TMP_INIT; exp = ARF_EXP(x); negative = ARF_SGNBIT(x); /* handle tiny input */ /* |exp(x) - 1| <= 2|x| */ if (!minus_one && exp < -prec - 4) { arf_one(arb_midref(z)); mag_set_ui_2exp_si(arb_radref(z), 1, exp + 1); return; } /* |exp(x) - (1 + x)| <= |x^2| */ else if (exp < (minus_one ? -prec - 4 : -(prec / 2) - 4)) { inexact = arf_add_ui(arb_midref(z), x, minus_one ? 0 : 1, prec, ARB_RND); mag_set_ui_2exp_si(arb_radref(z), 1, 2 * exp); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); return; } /* handle huge input */ if (exp > maglim) { arb_exp_arf_overflow(z, maglim, negative, minus_one, prec); return; } /* Absolute working precision (NOT rounded to a limb multiple) */ wp = prec + 8; if (minus_one && exp <= 0) wp += (-exp); /* Number of limbs */ wn = (wp + FLINT_BITS - 1) / FLINT_BITS; /* Precision rounded to a number of bits */ wprounded = FLINT_BITS * wn; /* Don't be close to the boundary (to allow adding adding the Taylor series truncation error without overflow) */ wp = FLINT_MAX(wp, wprounded - (FLINT_BITS - 4)); /* Too high precision to use table -- use generic algorithm */ if (wp > ARB_EXP_TAB2_PREC) { arb_exp_arf_fallback(z, x, exp, prec, minus_one); return; } TMP_START; tmp = TMP_ALLOC_LIMBS(4 * wn + 3); w = tmp; /* requires wn+1 limbs */ t = w + wn + 1; /* requires wn+1 limbs */ u = t + wn + 1; /* requires 2wn+1 limbs */ /* reduce modulo log(2) */ fmpz_init(n); if (_arb_get_mpn_fixed_mod_log2(w, n, &error, x, wn) == 0) { /* may run out of precision for log(2) */ arb_exp_arf_fallback(z, x, exp, prec, minus_one); fmpz_clear(n); TMP_END; return; } /* err(w) translates to a propagated error bounded by err(w) * exp'(x) < err(w) * exp(1) < err(w) * 3 */ error *= 3; /* Table-based argument reduction (1 or 2 steps) */ if (wp <= ARB_EXP_TAB1_PREC) { q1bits = ARB_EXP_TAB1_BITS; q2bits = 0; p1 = w[wn-1] >> (FLINT_BITS - q1bits); w[wn-1] -= (p1 << (FLINT_BITS - q1bits)); p2 = 0; } else { q1bits = ARB_EXP_TAB21_BITS; q2bits = ARB_EXP_TAB21_BITS + ARB_EXP_TAB22_BITS; p1 = w[wn-1] >> (FLINT_BITS - q1bits); w[wn-1] -= (p1 << (FLINT_BITS - q1bits)); p2 = w[wn-1] >> (FLINT_BITS - q2bits); w[wn-1] -= (p2 << (FLINT_BITS - q2bits)); } /* |w| <= 2^-r */ r = _arb_mpn_leading_zeros(w, wn); /* Choose number of terms N such that Taylor series truncation error is <= 2^-wp */ N = _arb_exp_taylor_bound(-r, wp); if (N < 60) { /* Evaluate Taylor series */ _arb_exp_taylor_rs(t, &error2, w, wn, N); /* Taylor series evaluation error */ error += error2; /* Taylor series truncation error */ error += UWORD(1) << (wprounded-wp); } else /* Compute cosh(a) from sinh(a) using a square root. */ { /* the summation for sinh is actually done to (2N-1)! */ N = (N + 1) / 2; /* Evaluate Taylor series for sinh */ _arb_sin_cos_taylor_rs(t, u, &error2, w, wn, N, 1, 0); error += error2; error += UWORD(1) << (wprounded-wp); /* 1 + sinh^2, with wn + 1 limbs */ mpn_sqr(u, t, wn); u[2 * wn] = 1; /* cosh, with wn + 1 limbs */ mpn_sqrtrem(w, u, u, 2 * wn + 1); /* exp = sinh + cosh */ t[wn] = w[wn] + mpn_add_n(t, t, w, wn); /* Error for cosh */ /* When converting sinh to cosh, the error for cosh must be smaller than the error for sinh; but we also get 1 ulp extra error from the square root. */ error2 = error + 1; /* Error for sinh + cosh */ error += error2; } if (wp <= ARB_EXP_TAB1_PREC) { if (p1 == 0) { finalvalue = t; finaln = wn + 1; } else { /* Divide by 2 to get |t| <= 1 (todo: check this?) */ mpn_rshift(t, t, wn + 1, 1); error = (error >> 1) + 2; mpn_mul_n(u, t, arb_exp_tab1[p1] + ARB_EXP_TAB1_LIMBS - wn, wn); /* (t + err1 * ulp) * (u + err2 * ulp) + 1ulp = t*u + (err1*u + err2*t + t*u*ulp + 1) * ulp note |u| <= 1, |t| <= 1 */ error += 4; finalvalue = u + wn; finaln = wn; /* we have effectively divided by 2^2 -- todo use inline function */ fmpz_add_ui(n, n, 2); } } else { if (p1 == 0 && p2 == 0) { finalvalue = t; finaln = wn + 1; } else { /* Divide by 2 to get |t| <= 1 (todo: check this?) */ mpn_rshift(t, t, wn + 1, 1); error = (error >> 1) + 2; mpn_mul_n(u, arb_exp_tab21[p1] + ARB_EXP_TAB2_LIMBS - wn, arb_exp_tab22[p2] + ARB_EXP_TAB2_LIMBS - wn, wn); /* error of w <= 4 ulp */ flint_mpn_copyi(w, u + wn, wn); /* todo: avoid with better alloc */ mpn_mul_n(u, t, w, wn); /* (t + err1 * ulp) * (w + 4 * ulp) + 1ulp = t*u + (err1*w + 4*t + t*w*ulp + 1) * ulp note |w| <= 1, |t| <= 1 */ error += 6; finalvalue = u + wn; finaln = wn; /* we have effectively divided by 2^3 -- todo use inline function */ fmpz_add_ui(n, n, 3); } } /* The accumulated arithmetic error */ mag_set_ui_2exp_si(arb_radref(z), error, -wprounded); /* Set the midpoint */ if (!minus_one) { inexact = _arf_set_mpn_fixed(arb_midref(z), finalvalue, finaln, wn, 0, prec, ARB_RND); if (inexact) arf_mag_add_ulp(arb_radref(z), arb_radref(z), arb_midref(z), prec); } else { _arf_set_mpn_fixed(arb_midref(z), finalvalue, finaln, wn, 0, finaln * FLINT_BITS, ARB_RND); } arb_mul_2exp_fmpz(z, z, n); if (minus_one) arb_sub_ui(z, z, 1, prec); TMP_END; fmpz_clear(n); } } /* todo: min prec by MAG_BITS everywhere? */ void arb_exp_wide(arb_t res, const arb_t x, slong prec, slong maglim) { mag_t t, u; mag_init(t); mag_init(u); if (arf_cmpabs_2exp_si(arb_midref(x), 20) < 0 && mag_cmp_2exp_si(arb_radref(x), 20) < 0) { if (arf_is_zero(arb_midref(x))) { if (mag_cmp_2exp_si(arb_radref(x), -10) < 0) { mag_expm1(arb_radref(res), arb_radref(x)); arf_one(arb_midref(res)); } else { mag_expinv_lower(t, arb_radref(x)); mag_exp(u, arb_radref(x)); arb_set_interval_mag(res, t, u, prec); } } else if (arb_contains_zero(x)) { arf_get_mag_lower(t, arb_midref(x)); mag_sub(t, arb_radref(x), t); arf_get_mag(u, arb_midref(x)); mag_add(u, arb_radref(x), u); if (arf_sgn(arb_midref(x)) > 0) { mag_expinv_lower(t, t); mag_exp(u, u); arb_set_interval_mag(res, t, u, prec); } else { mag_expinv_lower(u, u); mag_exp(t, t); arb_set_interval_mag(res, u, t, prec); } } else if (arf_sgn(arb_midref(x)) < 0) { arb_get_mag(t, x); arb_get_mag_lower(u, x); mag_expinv_lower(t, t); mag_expinv(u, u); arb_set_interval_mag(res, t, u, prec); } else { arb_get_mag_lower(t, x); arb_get_mag(u, x); mag_exp_lower(t, t); mag_exp(u, u); arb_set_interval_mag(res, t, u, prec); } } else { /* use arb_exp_arf for accurate argument reduction */ arf_t q; arf_init(q); arf_set_mag(q, arb_radref(x)); arf_add(q, q, arb_midref(x), MAG_BITS, ARF_RND_CEIL); arb_exp_arf(res, q, FLINT_MIN(prec, MAG_BITS), 0, maglim); arb_get_mag(arb_radref(res), res); arf_zero(arb_midref(res)); arf_clear(q); } mag_clear(t); mag_clear(u); } void arb_exp(arb_t res, const arb_t x, slong prec) { slong maglim = MAGLIM(prec); if (mag_is_zero(arb_radref(x))) { arb_exp_arf(res, arb_midref(x), prec, 0, maglim); } else if (mag_is_inf(arb_radref(x))) { if (arf_is_nan(arb_midref(x))) arb_indeterminate(res); else arb_zero_pm_inf(res); } else if (arf_is_special(arb_midref(x))) { if (arf_is_zero(arb_midref(x))) arb_exp_wide(res, x, prec, maglim); else if (arf_is_nan(arb_midref(x))) arb_indeterminate(res); else /* infinity +/- finite */ arb_exp_arf(res, arb_midref(x), prec, 0, 1); } else /* both finite, non-special */ { slong acc, mexp, rexp; mexp = ARF_EXP(arb_midref(x)); rexp = MAG_EXP(arb_radref(x)); if (COEFF_IS_MPZ(rexp)) rexp = (fmpz_sgn(ARF_EXPREF(arb_radref(x))) < 0) ? COEFF_MIN : COEFF_MAX; if (COEFF_IS_MPZ(mexp)) mexp = (fmpz_sgn(MAG_EXPREF(arb_midref(x))) < 0) ? COEFF_MIN : COEFF_MAX; if (mexp < -prec && rexp < -prec) { arb_get_mag(arb_radref(res), x); mag_expm1(arb_radref(res), arb_radref(res)); arf_one(arb_midref(res)); return; } acc = -rexp; acc = FLINT_MAX(acc, 0); acc = FLINT_MIN(acc, prec); prec = FLINT_MIN(prec, acc + MAG_BITS); prec = FLINT_MAX(prec, 2); if (acc < 20 && (rexp >= 0 || mexp <= 10)) { /* may evaluate at endpoints */ arb_exp_wide(res, x, prec, maglim); } else { /* exp(a+b) - exp(a) = exp(a) * (exp(b)-1) */ mag_t t, u; mag_init_set(t, arb_radref(x)); mag_init(u); arb_exp_arf(res, arb_midref(x), prec, 0, maglim); mag_expm1(t, t); arb_get_mag(u, res); mag_addmul(arb_radref(res), t, u); mag_clear(t); mag_clear(u); } } } void arb_expm1(arb_t res, const arb_t x, slong prec) { slong maglim = MAGLIM(prec); if (mag_is_zero(arb_radref(x))) { arb_exp_arf(res, arb_midref(x), prec, 1, maglim); } else if (mag_is_inf(arb_radref(x))) { if (arf_is_nan(arb_midref(x))) arb_indeterminate(res); else arb_zero_pm_inf(res); } else if (arf_is_special(arb_midref(x))) { if (arf_is_zero(arb_midref(x))) { if (mag_cmp_2exp_si(arb_radref(x), -10) < 0) { mag_expm1(arb_radref(res), arb_radref(x)); arf_zero(arb_midref(res)); } else { arb_exp_wide(res, x, prec, maglim); arb_sub_ui(res, res, 1, prec); } } else if (arf_is_nan(arb_midref(x))) arb_indeterminate(res); else /* infinity +/- finite */ arb_exp_arf(res, arb_midref(x), prec, 1, 1); } else /* both finite, non-special */ { if (arf_cmpabs_2exp_si(arb_midref(x), 3) < 0 && mag_cmp_2exp_si(arb_radref(x), -3) < 0) { mag_t t, u, one; slong acc, mexp, rexp; mexp = ARF_EXP(arb_midref(x)); rexp = MAG_EXP(arb_radref(x)); if (COEFF_IS_MPZ(rexp)) rexp = (fmpz_sgn(ARF_EXPREF(arb_radref(x))) < 0) ? COEFF_MIN : COEFF_MAX; if (COEFF_IS_MPZ(mexp)) mexp = (fmpz_sgn(MAG_EXPREF(arb_midref(x))) < 0) ? COEFF_MIN : COEFF_MAX; acc = FLINT_MIN(mexp, 0) - rexp; acc = FLINT_MAX(acc, 0); acc = FLINT_MIN(acc, prec); prec = FLINT_MIN(prec, acc + MAG_BITS); prec = FLINT_MAX(prec, 2); /* [exp(a+b) - 1] - [exp(a) - 1] = exp(a) * (exp(b)-1) */ mag_init_set(t, arb_radref(x)); mag_init(u); mag_init(one); mag_one(one); if (arf_sgn(arb_midref(x)) >= 0) { arb_exp_arf(res, arb_midref(x), prec, 1, maglim); arb_get_mag(u, res); mag_add(u, u, one); } else { arb_exp_arf(res, arb_midref(x), prec, 1, maglim); arb_get_mag_lower(u, res); mag_sub(u, one, u); } mag_expm1(t, t); mag_addmul(arb_radref(res), t, u); mag_clear(t); mag_clear(u); mag_clear(one); } else { arb_exp(res, x, prec); arb_sub_ui(res, res, 1, prec); } } } void arb_exp_invexp(arb_t res, arb_t res2, const arb_t x, slong prec) { slong maglim = MAGLIM(prec); if (arf_is_special(arb_midref(x)) || mag_is_special(arb_radref(x))) { /* [c +/- 0] */ if (arf_is_finite(arb_midref(x)) && mag_is_zero(arb_radref(x))) { arb_exp_arf(res, arb_midref(x), prec, 0, maglim); arb_inv(res2, res, prec); } /* [nan +/- ?] */ else if (arf_is_nan(arb_midref(x))) { arb_indeterminate(res); arb_indeterminate(res2); } /* [c +/- inf] */ else if (mag_is_inf(arb_radref(x))) { arb_zero_pm_inf(res); arb_zero_pm_inf(res2); } /* [+inf +/- c] */ else if (arf_is_pos_inf(arb_midref(x))) { arb_pos_inf(res); arb_zero(res2); } /* [-inf +/- c] */ else if (arf_is_neg_inf(arb_midref(x))) { arb_zero(res); arb_pos_inf(res2); } else { arb_t t; arb_init(t); arb_neg(t, x); arb_exp_wide(res, x, prec, maglim); arb_exp_wide(res2, t, prec, maglim); arb_clear(t); } } else /* both finite, non-special */ { slong acc, mexp, rexp; mexp = ARF_EXP(arb_midref(x)); rexp = MAG_EXP(arb_radref(x)); if (COEFF_IS_MPZ(rexp)) rexp = (fmpz_sgn(ARF_EXPREF(arb_radref(x))) < 0) ? COEFF_MIN : COEFF_MAX; if (COEFF_IS_MPZ(mexp)) mexp = (fmpz_sgn(MAG_EXPREF(arb_midref(x))) < 0) ? COEFF_MIN : COEFF_MAX; if (mexp < -prec && rexp < -prec) { arb_get_mag(arb_radref(res), x); mag_expm1(arb_radref(res), arb_radref(res)); arf_one(arb_midref(res)); arb_set(res2, res); return; } acc = -rexp; acc = FLINT_MAX(acc, 0); acc = FLINT_MIN(acc, prec); prec = FLINT_MIN(prec, acc + MAG_BITS); prec = FLINT_MAX(prec, 2); if (acc < 20 && (rexp >= 0 || mexp <= 10)) { /* may evaluate at endpoints */ arb_t t; arb_init(t); arb_neg(t, x); arb_exp_wide(res, x, prec, maglim); arb_exp_wide(res2, t, prec, maglim); arb_clear(t); } else { /* exp(a+b) - exp(a) = exp(a) * (exp(b)-1) */ mag_t t, u; mag_init_set(t, arb_radref(x)); mag_init(u); arb_exp_arf(res, arb_midref(x), prec, 0, maglim); arb_inv(res2, res, prec); mag_expm1(t, t); arb_get_mag(u, res); mag_addmul(arb_radref(res), t, u); arb_get_mag(u, res2); mag_addmul(arb_radref(res2), t, u); mag_clear(t); mag_clear(u); } } }