/*
Copyright (C) 2011 Sebastian Pancratz
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See .
*/
#include
#include "flint.h"
#include "fmpz_mod_poly.h"
/*
Let i be such that 2^{i} < len1 <= 2^{i+1}.
Note that the jth step of the recursion requires temporary space
of size no more than (len2 - 1)(2^j - 1) + 1. Note the smallest
step j=0 doesn't require any temporary space and the largest step
has j = i, and hence the sum is
sum_{j=1}^i [(len2 - 1) (2^j - 1) + 1]
= (len2 - 1)(2^{i+1} - 2) - (len2 - 2) i
*/
void _fmpz_mod_poly_compose_divconquer_recursive(fmpz *res,
const fmpz *poly1, slong len1, fmpz **pow2, slong len2, fmpz *v,
const fmpz_t p)
{
if (len1 == 1)
{
fmpz_set(res, poly1);
}
else if (len1 == 2)
{
_fmpz_mod_poly_scalar_mul_fmpz(res, pow2[0], len2, poly1 + 1, p);
fmpz_add(res, res, poly1);
if (fmpz_cmpabs(res, p) >= 0)
fmpz_sub(res, res, p);
}
else
{
const slong i = FLINT_BIT_COUNT(len1 - 1) - 1;
fmpz *w = v + ((WORD(1) << i) - 1) * (len2 - 1) + 1;
_fmpz_mod_poly_compose_divconquer_recursive(v,
poly1 + (WORD(1) << i), len1 - (WORD(1) << i), pow2, len2, w, p);
_fmpz_mod_poly_mul(res, pow2[i], (len2 - 1) * (WORD(1) << i) + 1,
v, (len2 - 1) * (len1 - (WORD(1) << i) - 1) + 1, p);
_fmpz_mod_poly_compose_divconquer_recursive(v, poly1, WORD(1) << i,
pow2, len2, w, p);
_fmpz_mod_poly_add(res, res, (len2 - 1) * ((WORD(1) << i) - 1) + 1,
v, (len2 - 1) * ((WORD(1) << i) - 1) + 1, p);
}
}
void _fmpz_mod_poly_compose_divconquer(fmpz *res,
const fmpz *poly1, slong len1,
const fmpz *poly2, slong len2,
const fmpz_t p)
{
if (len1 == 1 || len2 == 0)
{
fmpz_set(res, poly1);
}
else
{
const slong k = FLINT_BIT_COUNT(len1 - 1);
const slong lenV = len2 * ((WORD(1) << k) - 1) + k;
const slong lenW = (len2 - 1) * ((WORD(1) << k) - 2) - (len2 - 2) * (k-1);
slong i;
fmpz *v, *w, **pow2;
v = _fmpz_vec_init(lenV + lenW);
w = v + lenV;
pow2 = flint_malloc(k * sizeof(fmpz *));
for (i = 0; i < k; i++)
{
pow2[i] = v + (len2 * ((WORD(1) << i) - 1) + i);
}
_fmpz_vec_set(pow2[0], poly2, len2);
for (i = 1; i < k; i++)
{
_fmpz_mod_poly_sqr(pow2[i],
pow2[i-1], (len2 - 1) * (WORD(1) << (i - 1)) + 1, p);
}
_fmpz_mod_poly_compose_divconquer_recursive(res, poly1, len1,
pow2, len2, w, p);
_fmpz_vec_clear(v, lenV + lenW);
flint_free(pow2);
}
}
void fmpz_mod_poly_compose_divconquer(fmpz_mod_poly_t res,
const fmpz_mod_poly_t poly1, const fmpz_mod_poly_t poly2,
const fmpz_mod_ctx_t ctx)
{
const slong len1 = poly1->length;
const slong len2 = poly2->length;
if (len1 == 0)
{
fmpz_mod_poly_zero(res, ctx);
}
else if (len1 == 1 || len2 == 0)
{
fmpz_mod_poly_set_fmpz(res, poly1->coeffs, ctx);
}
else
{
const slong lenr = (len1 - 1) * (len2 - 1) + 1;
if ((res != poly1) && (res != poly2))
{
fmpz_mod_poly_fit_length(res, lenr, ctx);
_fmpz_mod_poly_compose_divconquer(res->coeffs, poly1->coeffs, len1,
poly2->coeffs, len2, fmpz_mod_ctx_modulus(ctx));
}
else
{
fmpz *t = _fmpz_vec_init(lenr);
_fmpz_mod_poly_compose_divconquer(t, poly1->coeffs, len1,
poly2->coeffs, len2, fmpz_mod_ctx_modulus(ctx));
_fmpz_vec_clear(res->coeffs, res->alloc);
res->coeffs = t;
res->alloc = lenr;
res->length = lenr;
}
_fmpz_mod_poly_set_length(res, lenr);
_fmpz_mod_poly_normalise(res);
}
}