/* Copyright (C) 2021 Fredrik Johansson This file is part of Arb. Arb is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include "arb.h" void arb_si_pow_ui(arb_t res, slong b, ulong e, slong prec) { arb_ui_pow_ui(res, UI_ABS_SI(b), e, prec); if ((e & 1) && b < 0) arb_neg(res, res); } void arb_set_round_ui(arb_t res, ulong lo, slong prec) { if (lo == 0) { arb_zero(res); } else { int inexact; inexact = _arf_set_round_ui(arb_midref(res), lo, 0, prec, ARB_RND); if (inexact) arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec); else mag_zero(arb_radref(res)); } } void arb_set_round_uiui(arb_t res, ulong hi, ulong lo, slong prec) { if (hi == 0 && lo == 0) { arb_zero(res); } else { int inexact; slong fix; inexact = _arf_set_round_uiui(arb_midref(res), &fix, hi, lo, 0, prec, ARB_RND); _fmpz_demote(ARF_EXPREF(arb_midref(res))); ARF_EXP(arb_midref(res)) = 2 * FLINT_BITS + fix; if (inexact) arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec); else mag_zero(arb_radref(res)); } } void arb_ui_pow_ui(arb_t res, ulong a, ulong exp, slong prec) { slong wp, aexp, awidth, trailing, wp_limbs; slong exp_fix, alloc, leading; int inexact, i, ebits; mp_ptr yman, tmp; mp_size_t yn; mp_limb_t yexp_hi, yexp_lo, aman, aodd, hi, lo; ARF_MUL_TMP_DECL if (exp <= 2) { if (exp == 0) { arb_one(res); } else if (exp == 1) { arb_set_round_ui(res, a, prec); } else { mp_limb_t hi, lo; umul_ppmm(hi, lo, a, a); arb_set_round_uiui(res, hi, lo, prec); } return; } if (a <= 1) { arb_set_ui(res, a); return; } aexp = FLINT_BIT_COUNT(a); count_trailing_zeros(trailing, a); awidth = aexp - trailing; /* a = power of two */ if (awidth == 1) { fmpz_t t; fmpz_init(t); fmpz_set_ui(t, exp); fmpz_mul_ui(t, t, aexp - 1); arb_one(res); arb_mul_2exp_fmpz(res, res, t); fmpz_clear(t); return; } umul_ppmm(hi, lo, awidth, exp); if (hi == 0) prec = FLINT_MIN(prec, lo); wp = prec + 2 * FLINT_BIT_COUNT(exp) + 4; wp_limbs = (wp + FLINT_BITS - 1) / FLINT_BITS; if (FLINT_BITS == 32 && wp_limbs % 2) wp_limbs++; /* Algorithm: as long as the result is exact, work with powers of the odd part of a as an integer. As soon as we exceed wp_limbs, switch to a floating-point product with exactly wp_limbs (it is possible that we get trailing zero limbs after this point so that the number of limbs could be reduced temporarily, but this is not worth the trouble). */ alloc = 4 * (wp_limbs + 2); ARF_MUL_TMP_ALLOC(tmp, alloc) yman = tmp + 2 * (wp_limbs + 2); /* a as a top-aligned mantissa */ aman = a << (FLINT_BITS - aexp); /* a as a bottom-aligned mantissa */ aodd = a >> trailing; /* y = a */ yn = 1; yman[0] = aodd; /* yexp will initially be the bottom exponent. we convert to a floating-point (top) exponent only when the result becomes inexact */ yexp_lo = trailing; /* the exponent can be two words wide (we only need this in the inexact case) */ yexp_hi = 0; inexact = 0; ebits = FLINT_BIT_COUNT(exp); for (i = ebits - 2; i >= 0; i--) { /* Integer case */ if (!inexact) { /* Inline code for small integers */ if (yn == 1) { yexp_lo *= 2; umul_ppmm(yman[1], yman[0], yman[0], yman[0]); yn += (yman[1] != 0); if (exp & (UWORD(1) << i)) { yexp_lo += trailing; if (yn == 1) { umul_ppmm(yman[1], yman[0], yman[0], aodd); yn += (yman[1] != 0); } else { mp_limb_t y0, y1; y0 = yman[0]; y1 = yman[1]; nn_mul_2x1(yman[2], yman[1], yman[0], y1, y0, aodd); yn += (yman[2] != 0); } } } else { /* todo: if 2 * yn is significantly larger than wp_limbs, we might want to go to the floating-point code here */ yexp_lo *= 2; mpn_sqr(tmp, yman, yn); yn = 2 * yn; yn -= (tmp[yn - 1] == 0); if (exp & (UWORD(1) << i)) { yexp_lo += trailing; yman[yn] = mpn_mul_1(yman, tmp, yn, aodd); yn += (yman[yn] != 0); } else { flint_mpn_copyi(yman, tmp, yn); } } /* convert to floating-point form */ /* todo: redundant if this is the last iteration */ if (yn > wp_limbs) { inexact = 1; count_leading_zeros(leading, yman[yn - 1]); yexp_lo = yexp_lo + yn * FLINT_BITS - leading; if (leading == 0) flint_mpn_copyi(yman, yman + yn - wp_limbs, wp_limbs); else mpn_rshift(yman, yman + yn - wp_limbs - 1, wp_limbs + 1, FLINT_BITS - leading); yn = wp_limbs; } continue; } /* y = y^2: exponent */ yexp_hi = (yexp_hi << 1) | (yexp_lo >> (FLINT_BITS - 1)); yexp_lo <<= 1; /* special case for 1-limb precision */ /* note: we must have yn == 1 here if wp_limbs == 1 */ if (wp_limbs == 1) { mp_limb_t hi, lo; /* y = y^2: mantissa */ umul_ppmm(hi, lo, yman[0], yman[0]); if (!(hi >> (FLINT_BITS - 1))) { hi = (hi << 1) | (lo >> (FLINT_BITS - 1)); sub_ddmmss(yexp_hi, yexp_lo, yexp_hi, yexp_lo, 0, 1); } yman[0] = hi; if (exp & (UWORD(1) << i)) { /* y = y * a: exponent */ add_ssaaaa(yexp_hi, yexp_lo, yexp_hi, yexp_lo, 0, aexp); /* y = y * a: mantissa */ umul_ppmm(hi, lo, yman[0], aman); if (!(hi >> (FLINT_BITS - 1))) { hi = (hi << 1) | (lo >> (FLINT_BITS - 1)); sub_ddmmss(yexp_hi, yexp_lo, yexp_hi, yexp_lo, 0, 1); } yman[0] = hi; } continue; } /* y = y^2: mantissa */ if (yn >= 25 && yn <= 10000) /* use mpfr for sqrhi */ { mpfr_t zf, xf; zf->_mpfr_d = tmp; zf->_mpfr_prec = wp_limbs * FLINT_BITS; zf->_mpfr_sign = 1; zf->_mpfr_exp = 0; xf->_mpfr_d = yman; xf->_mpfr_prec = yn * FLINT_BITS; xf->_mpfr_sign = 1; xf->_mpfr_exp = 0; mpfr_sqr(zf, xf, MPFR_RNDD); if (zf->_mpfr_exp != 0) sub_ddmmss(yexp_hi, yexp_lo, yexp_hi, yexp_lo, 0, 1); yn = wp_limbs; } else /* or exactly */ { mpn_sqr(tmp, yman, yn); yn = 2 * yn; } if (yn < wp_limbs) flint_abort(); /* y = y * a */ if (exp & (UWORD(1) << i)) { tmp[yn] = mpn_mul_1(tmp + yn - wp_limbs, tmp + yn - wp_limbs, wp_limbs, aman); yn++; add_ssaaaa(yexp_hi, yexp_lo, yexp_hi, yexp_lo, 0, aexp); } /* move result from tmp to yman, and normalize; at this point there can be 0, 1 or 2 leading zeros */ if (tmp[yn - 1] >> (FLINT_BITS - 1)) { flint_mpn_copyi(yman, tmp + yn - wp_limbs, wp_limbs); } else { /* assumed so that we can read one extra limb with mpn_rshift */ /* yn == wp_limbs is only possible if we used mpfr_sqr above and there was no multiplication by a, but in that case there are 0 leading zeros anyway */ if (yn == wp_limbs) flint_abort(); if (tmp[yn - 1] >> (FLINT_BITS - 2)) { mpn_rshift(yman, tmp + yn - wp_limbs - 1, wp_limbs + 1, FLINT_BITS - 1); sub_ddmmss(yexp_hi, yexp_lo, yexp_hi, yexp_lo, 0, 1); } else { mpn_rshift(yman, tmp + yn - wp_limbs - 1, wp_limbs + 1, FLINT_BITS - 2); sub_ddmmss(yexp_hi, yexp_lo, yexp_hi, yexp_lo, 0, 2); } } yn = wp_limbs; } /* convert bottom exponent to floating-point (top) exponent */ if (!inexact) yexp_lo = yexp_lo + yn * FLINT_BITS; inexact |= _arf_set_round_mpn(arb_midref(res), &exp_fix, yman, yn, 0, prec, ARF_RND_DOWN); if (exp_fix) sub_ddmmss(yexp_hi, yexp_lo, yexp_hi, yexp_lo, 0, -exp_fix); fmpz_set_uiui(ARF_EXPREF(arb_midref(res)), yexp_hi, yexp_lo); if (inexact) arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec - 1); else mag_zero(arb_radref(res)); ARF_MUL_TMP_FREE(tmp, alloc) }