/* Copyright (C) 2015 William Hart Copyright (C) 2015 Fredrik Johansson Copyright (C) 2015 Kushagra Singh This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include #define ulong ulongxx /* interferes with system includes */ #include #undef ulong #include "flint.h" #include "ulong_extras.h" /* Coefficients of Chebyshev's approximation polynomial (deg 2) {c0, c1, c2} splitting 0.5 to 1 into 8 equal intervals Values of these coefficients of Chebyshev's approximation polynomial have been calculated from the python module, "mpmath" - http://mpmath.org/ function call: mpmath.chebyfit(lambda x: mpmath.root(x,3), [i, j], 3, error=True) where (i, j) is the range */ static const float factor_table[] = {1.000000, 1.259921, 1.587401}; /* {1^(1/3), 2^(1/3), 4^(1/3)} */ /* c0 c1 c2 range */ static const float coeff[16][3] = {{0.445434042, 0.864136635, -0.335205926}, /* [0.50000, 0.53125] */ {0.454263239, 0.830878907, -0.303884962}, /* [0.53125, 0.56250] */ {0.462761624, 0.800647514, -0.276997626}, /* [0.56250, 0.59375] */ {0.470958569, 0.773024522, -0.253724515}, /* [0.59375, 0.62500] */ {0.478879482, 0.747667468, -0.233429710}, /* [0.62500, 0.65625] */ {0.486546506, 0.724292830, -0.215613166}, /* [0.65625, 0.68750] */ {0.493979069, 0.702663686, -0.199877008}, /* [0.68750, 0.71875] */ {0.501194325, 0.682580388, -0.185901247}, /* [0.71875, 0.75000] */ {0.508207500, 0.663873398, -0.173426009}, /* [0.75000, 0.78125] */ {0.515032183, 0.646397742, -0.162238357}, /* [0.78125, 0.81250] */ {0.521680556, 0.630028647, -0.152162376}, /* [0.81250, 0.84375] */ {0.528163588, 0.614658092, -0.143051642}, /* [0.84375, 0.87500] */ {0.534491194, 0.600192044, -0.134783425}, /* [0.87500, 0.90625] */ {0.540672371, 0.586548233, -0.127254189}, /* [0.90625, 0.93750] */ {0.546715310, 0.573654340, -0.120376066}, /* [0.93750, 0.96875] */ {0.552627494, 0.561446514, -0.114074068}}; /* [0.96875, 1.00000] */ mp_limb_t n_cbrt_chebyshev_approx(mp_limb_t n) { typedef union { mp_limb_t uword_val; #ifdef FLINT64 double double_val; #else float double_val; #endif } uni; int rem, mul; double factor, root, dec, dec2; mp_limb_t ret, expo, table_index; uni alias; /* upper_limit is the max cube root possible for one word */ #ifdef FLINT64 const mp_limb_t upper_limit = 2642245; /* 2642245 < (2^64)^(1/3) */ const mp_limb_t expo_mask = 0x7FF0000000000000; /* exponent bits in double */ const mp_limb_t mantissa_mask = 0x000FFFFFFFFFFFFF; /* mantissa bits in float */ const mp_limb_t table_mask = 0x000F000000000000; /* first 4 bits of mantissa */ const int mantissa_bits = 52; const mp_limb_t bias_hex = 0x3FE0000000000000; const int bias = 1022; alias.double_val = (double)n; #else const mp_limb_t upper_limit = 1625; /* 1625 < (2^32)^(1/3) */ const mp_limb_t expo_mask = 0x7F800000; /* exponent bits in float */ const mp_limb_t mantissa_mask = 0x007FFFFF; /* mantissa bits in float */ const mp_limb_t table_mask = 0x00780000; /* first 4 bits of mantissa */ const int mantissa_bits = 23; const mp_limb_t bias_hex = 0x3F000000; const int bias = 126; alias.double_val = (float)n; #endif expo = alias.uword_val & expo_mask; /* extracting exponent */ expo >>= mantissa_bits; expo -= bias; /* Subtracting bias */ /* extracting first 4 bits of mantissa, this will help select correct poly */ /* note mantissa of 0.5 is 0x0000000000000 not 0x1000000000000 */ table_index = alias.uword_val & table_mask; table_index >>= (mantissa_bits - 4); /* extracting decimal part, 0.5 <= dec <= 1 */ ret = alias.uword_val & mantissa_mask; ret |= bias_hex; alias.uword_val = ret; dec = alias.double_val; rem = expo % 3; expo /= 3; /* cube root of 2^expo */ factor = factor_table[rem]; /* select factor */ /* Calculating cube root of dec using chebyshev approximation polynomial */ /* Evaluating approx polynomial at (dec) by Estrin's scheme */ dec2 = dec*dec; root = (coeff[table_index][0] + coeff[table_index][1] * dec); root += (coeff[table_index][2] * dec2); mul = UWORD(1) << expo; /* mul = 2^expo */ root *= mul; /* dec^(1/3) * 2^(expo/3) */ root *= factor; /* root*= (expo%3)^(1/3) */ ret = root; /* In case ret^3 or (ret+1)^3 will cause overflow */ if (ret >= upper_limit) { if (n >= upper_limit * upper_limit * upper_limit) return upper_limit; ret = upper_limit - 1; } while (ret * ret * ret <= n) { (ret) += 1; if (ret == upper_limit) break; } while (ret * ret * ret > n) (ret) -= 1; return ret; }