/*
Copyright (C) 2012, 2021 Fredrik Johansson
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See .
*/
#include
#include "flint.h"
#include "fmpz.h"
#include "fmpz_poly.h"
/* Improve cache locality with huge coefficients */
#define BLOCK_SIZE 32
void
_fmpz_poly_taylor_shift_horner(fmpz * poly, const fmpz_t c, slong n)
{
slong i, j, bits, out_bits;
if (n <= 1 || fmpz_is_zero(c))
return;
if (!fmpz_is_one(c))
{
if (n <= 4 || (*c != WORD(-1) && n <= 10))
{
if (*c == WORD(-1))
{
for (i = n - 2; i >= 0; i--)
for (j = i; j < n - 1; j++)
fmpz_sub(poly + j, poly + j, poly + j + 1);
}
else
{
for (i = n - 2; i >= 0; i--)
for (j = i; j < n - 1; j++)
fmpz_addmul(poly + j, poly + j + 1, c);
}
}
else
{
slong i;
fmpz_t d, one;
*one = 1;
if (*c == WORD(-1))
{
for (i = 1; i < n; i += 2)
fmpz_neg(poly + i, poly + i);
_fmpz_poly_taylor_shift_horner(poly, one, n);
for (i = 1; i < n; i += 2)
fmpz_neg(poly + i, poly + i);
}
else
{
fmpz_init(d);
for (i = 1; i < n; i++)
{
if (i == 1)
fmpz_set(d, c);
else
fmpz_mul(d, d, c);
fmpz_mul(poly + i, poly + i, d);
}
_fmpz_poly_taylor_shift_horner(poly, one, n);
for (i = n - 1; i >= 1; i--)
{
fmpz_divexact(poly + i, poly + i, d);
if (i >= 3)
fmpz_divexact(d, d, c);
else if (i == 2)
fmpz_set(d, c);
}
fmpz_clear(d);
}
}
return;
}
bits = _fmpz_vec_max_bits(poly, n);
bits = FLINT_ABS(bits);
out_bits = bits + n + 1;
if (out_bits <= FLINT_BITS - 2)
{
for (i = n - 2; i >= 0; i--)
for (j = i; j < n - 1; j++)
poly[j] += poly[j + 1];
}
else if (n >= 5 && out_bits < 2 * FLINT_BITS)
{
mp_ptr t;
TMP_INIT;
TMP_START;
t = TMP_ALLOC(2 * n * sizeof(mp_limb_t));
for (i = 0; i < n; i++)
fmpz_get_signed_uiui(t + 2*i + 1, t + 2*i + 0, poly + i);
for (i = n - 2; i >= 0; i--)
for (j = i; j < n - 1; j++)
add_ssaaaa(t[2*j + 1], t[2*j], t[2*j + 1], t[2*j],
t[2*(j + 1) + 1], t[2*(j + 1)]);
for (i = 0; i < n; i++)
fmpz_set_signed_uiui(poly + i, t[2*i + 1], t[2*i]);
TMP_END;
}
else if (n >= 3 + FLINT_BIT_COUNT(bits) && bits <= 100 * FLINT_BITS)
{
slong B = BLOCK_SIZE, ii, jj, d;
mp_ptr t;
TMP_INIT;
TMP_START;
d = (out_bits + FLINT_BITS - 1) / FLINT_BITS;
t = TMP_ALLOC(d * n * sizeof(mp_limb_t));
for (i = 0; i < n; i++)
fmpz_get_signed_ui_array(t + d * i, d, poly + i);
if (d == 3)
{
for (i = n - 2; i >= 0; i--)
for (j = i; j < n - 1; j++)
add_sssaaaaaa(t[3 * j + 2], t[3 * j + 1], t[3 * j],
t[3 * j + 2], t[3 * j + 1], t[3 * j],
t[3 * (j + 1) + 2], t[3 * (j + 1) + 1], t[3 * (j + 1)]);
}
else if (n < 2 * B)
{
for (i = n - 2; i >= 0; i--)
for (j = i; j < n - 1; j++)
mpn_add_n(t + d * j, t + d * j, t + d * (j + 1), d);
}
else
{
for (ii = n - 2; ii >= 0; ii -= B)
for (jj = 0; jj < n - ii - 1; jj += B)
for (i = ii; i >= FLINT_MAX(ii - B + 1, 0); i--)
for (j = jj; j < FLINT_MIN(jj + B, n - i - 1); j++)
mpn_add_n(t + d * (i + j), t + d * (i + j), t + d * (i + j + 1), d);
}
for (i = 0; i < n; i++)
fmpz_set_signed_ui_array(poly + i, t + d * i, d);
TMP_END;
}
else
{
slong B = BLOCK_SIZE, ii, jj;
if (n < 2 * B)
{
for (i = n - 2; i >= 0; i--)
for (j = i; j < n - 1; j++)
fmpz_add(poly + j, poly + j, poly + j + 1);
}
else
{
for (ii = n - 2; ii >= 0; ii -= B)
for (jj = 0; jj < n - ii - 1; jj += B)
for (i = ii; i >= FLINT_MAX(ii - B + 1, 0); i--)
for (j = jj; j < FLINT_MIN(jj + B, n - i - 1); j++)
fmpz_add(poly + i + j, poly + i + j, poly + i + j + 1);
}
}
}
void
fmpz_poly_taylor_shift_horner(fmpz_poly_t g, const fmpz_poly_t f,
const fmpz_t c)
{
if (f != g)
fmpz_poly_set(g, f);
_fmpz_poly_taylor_shift_horner(g->coeffs, c, g->length);
}