/* Copyright (C) 2012 Fredrik Johansson This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include #include "flint.h" #include "ulong_extras.h" #include "nmod_poly.h" /* This gives some speedup for small lengths. */ static __inline__ void _nmod_poly_rem_2(mp_ptr r, mp_srcptr a, slong al, mp_srcptr b, slong bl, nmod_t mod) { if (al == 2) r[0] = nmod_sub(a[0], nmod_mul(a[1], b[0], mod), mod); else _nmod_poly_rem(r, a, al, b, bl, mod); } void _nmod_poly_evaluate_nmod_vec_fast_precomp(mp_ptr vs, mp_srcptr poly, slong plen, const mp_ptr * tree, slong len, nmod_t mod) { slong height, i, j, pow, left; slong tree_height; slong tlen; mp_ptr t, u, swap, pa, pb, pc; /* avoid worrying about some degenerate cases */ if (len < 2 || plen < 2) { if (len == 1) vs[0] = _nmod_poly_evaluate_nmod(poly, plen, nmod_neg(tree[0][0], mod), mod); else if (len != 0 && plen == 0) _nmod_vec_zero(vs, len); else if (len != 0 && plen == 1) for (i = 0; i < len; i++) vs[i] = poly[0]; return; } t = _nmod_vec_init(len); u = _nmod_vec_init(len); left = len; /* Initial reduction. We allow the polynomial to be larger or smaller than the number of points. */ height = FLINT_BIT_COUNT(plen - 1) - 1; tree_height = FLINT_CLOG2(len); while (height >= tree_height) height--; pow = WORD(1) << height; for (i = j = 0; i < len; i += pow, j += (pow + 1)) { tlen = ((i + pow) <= len) ? pow : len % pow; _nmod_poly_rem(t + i, poly, plen, tree[height] + j, tlen + 1, mod); } for (i = height - 1; i >= 0; i--) { pow = WORD(1) << i; left = len; pa = tree[i]; pb = t; pc = u; while (left >= 2 * pow) { _nmod_poly_rem_2(pc, pb, 2 * pow, pa, pow + 1, mod); _nmod_poly_rem_2(pc + pow, pb, 2 * pow, pa + pow + 1, pow + 1, mod); pa += 2 * pow + 2; pb += 2 * pow; pc += 2 * pow; left -= 2 * pow; } if (left > pow) { _nmod_poly_rem(pc, pb, left, pa, pow + 1, mod); _nmod_poly_rem(pc + pow, pb, left, pa + pow + 1, left - pow + 1, mod); } else if (left > 0) _nmod_vec_set(pc, pb, left); swap = t; t = u; u = swap; } _nmod_vec_set(vs, t, len); _nmod_vec_clear(t); _nmod_vec_clear(u); } void _nmod_poly_evaluate_nmod_vec_fast(mp_ptr ys, mp_srcptr poly, slong plen, mp_srcptr xs, slong n, nmod_t mod) { mp_ptr * tree; tree = _nmod_poly_tree_alloc(n); _nmod_poly_tree_build(tree, xs, n, mod); _nmod_poly_evaluate_nmod_vec_fast_precomp(ys, poly, plen, tree, n, mod); _nmod_poly_tree_free(tree, n); } void nmod_poly_evaluate_nmod_vec_fast(mp_ptr ys, const nmod_poly_t poly, mp_srcptr xs, slong n) { _nmod_poly_evaluate_nmod_vec_fast(ys, poly->coeffs, poly->length, xs, n, poly->mod); }