/*
Copyright (C) 2015 William Hart
Copyright (C) 2015 Fredrik Johansson
Copyright (C) 2015 Kushagra Singh
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See .
*/
#include
#define ulong ulongxx /* interferes with system includes */
#include
#undef ulong
#include "flint.h"
#include "ulong_extras.h"
/* Coefficients of Chebyshev's approximation polynomial (deg 2) {c0, c1, c2}
splitting 0.5 to 1 into 8 equal intervals
Values of these coefficients of Chebyshev's approximation polynomial have been
calculated from the python module, "mpmath" - http://mpmath.org/
function call:
mpmath.chebyfit(lambda x: mpmath.root(x,3), [i, j], 3, error=True)
where (i, j) is the range */
static const float factor_table[] = {1.000000, 1.259921, 1.587401}; /* {1^(1/3), 2^(1/3), 4^(1/3)} */
/* c0 c1 c2 range */
static const float coeff[16][3] = {{0.445434042, 0.864136635, -0.335205926}, /* [0.50000, 0.53125] */
{0.454263239, 0.830878907, -0.303884962}, /* [0.53125, 0.56250] */
{0.462761624, 0.800647514, -0.276997626}, /* [0.56250, 0.59375] */
{0.470958569, 0.773024522, -0.253724515}, /* [0.59375, 0.62500] */
{0.478879482, 0.747667468, -0.233429710}, /* [0.62500, 0.65625] */
{0.486546506, 0.724292830, -0.215613166}, /* [0.65625, 0.68750] */
{0.493979069, 0.702663686, -0.199877008}, /* [0.68750, 0.71875] */
{0.501194325, 0.682580388, -0.185901247}, /* [0.71875, 0.75000] */
{0.508207500, 0.663873398, -0.173426009}, /* [0.75000, 0.78125] */
{0.515032183, 0.646397742, -0.162238357}, /* [0.78125, 0.81250] */
{0.521680556, 0.630028647, -0.152162376}, /* [0.81250, 0.84375] */
{0.528163588, 0.614658092, -0.143051642}, /* [0.84375, 0.87500] */
{0.534491194, 0.600192044, -0.134783425}, /* [0.87500, 0.90625] */
{0.540672371, 0.586548233, -0.127254189}, /* [0.90625, 0.93750] */
{0.546715310, 0.573654340, -0.120376066}, /* [0.93750, 0.96875] */
{0.552627494, 0.561446514, -0.114074068}}; /* [0.96875, 1.00000] */
mp_limb_t
n_cbrt_chebyshev_approx(mp_limb_t n)
{
typedef union {
mp_limb_t uword_val;
#ifdef FLINT64
double double_val;
#else
float double_val;
#endif
} uni;
int rem, mul;
double factor, root, dec, dec2;
mp_limb_t ret, expo, table_index;
uni alias;
/* upper_limit is the max cube root possible for one word */
#ifdef FLINT64
const mp_limb_t upper_limit = 2642245; /* 2642245 < (2^64)^(1/3) */
const mp_limb_t expo_mask = 0x7FF0000000000000; /* exponent bits in double */
const mp_limb_t mantissa_mask = 0x000FFFFFFFFFFFFF; /* mantissa bits in float */
const mp_limb_t table_mask = 0x000F000000000000; /* first 4 bits of mantissa */
const int mantissa_bits = 52;
const mp_limb_t bias_hex = 0x3FE0000000000000;
const int bias = 1022;
alias.double_val = (double)n;
#else
const mp_limb_t upper_limit = 1625; /* 1625 < (2^32)^(1/3) */
const mp_limb_t expo_mask = 0x7F800000; /* exponent bits in float */
const mp_limb_t mantissa_mask = 0x007FFFFF; /* mantissa bits in float */
const mp_limb_t table_mask = 0x00780000; /* first 4 bits of mantissa */
const int mantissa_bits = 23;
const mp_limb_t bias_hex = 0x3F000000;
const int bias = 126;
alias.double_val = (float)n;
#endif
expo = alias.uword_val & expo_mask; /* extracting exponent */
expo >>= mantissa_bits;
expo -= bias; /* Subtracting bias */
/* extracting first 4 bits of mantissa, this will help select correct poly */
/* note mantissa of 0.5 is 0x0000000000000 not 0x1000000000000 */
table_index = alias.uword_val & table_mask;
table_index >>= (mantissa_bits - 4);
/* extracting decimal part, 0.5 <= dec <= 1 */
ret = alias.uword_val & mantissa_mask;
ret |= bias_hex;
alias.uword_val = ret;
dec = alias.double_val;
rem = expo % 3;
expo /= 3; /* cube root of 2^expo */
factor = factor_table[rem]; /* select factor */
/* Calculating cube root of dec using chebyshev approximation polynomial */
/* Evaluating approx polynomial at (dec) by Estrin's scheme */
dec2 = dec*dec;
root = (coeff[table_index][0] + coeff[table_index][1] * dec);
root += (coeff[table_index][2] * dec2);
mul = UWORD(1) << expo; /* mul = 2^expo */
root *= mul; /* dec^(1/3) * 2^(expo/3) */
root *= factor; /* root*= (expo%3)^(1/3) */
ret = root;
/* In case ret^3 or (ret+1)^3 will cause overflow */
if (ret >= upper_limit)
{
if (n >= upper_limit * upper_limit * upper_limit)
return upper_limit;
ret = upper_limit - 1;
}
while (ret * ret * ret <= n)
{
(ret) += 1;
if (ret == upper_limit)
break;
}
while (ret * ret * ret > n)
(ret) -= 1;
return ret;
}