/*
Copyright (C) 2007, 2008 David Harvey (zn_poly)
Copyright (C) 2013 William Hart
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See .
*/
#include
#include
#include "flint.h"
#include "nmod_vec.h"
#include "nmod_poly.h"
void
_nmod_poly_KS2_reduce(mp_ptr res, slong s, mp_srcptr op, slong n, ulong w,
nmod_t mod)
{
if (w == 1)
{
for (; n; n--, res += s, op++)
NMOD_RED(*res, *op, mod);
}
else if (w == 2)
{
for (; n; n--, res += s, op += 2)
NMOD2_RED2(*res, op[1], op[0], mod);
}
else /* w == 3 */
{
for (; n; n--, res += s, op += 3)
NMOD_RED3(*res, op[2], op[1], op[0], mod);
}
}
/*
Same as _nmod_poly_KS2_recover_reduce(), but requires 0 < 2 * b <= FLINT_BITS
*/
void
_nmod_poly_KS2_recover_reduce1(mp_ptr res, slong s, mp_srcptr op1,
mp_srcptr op2, slong n, ulong b,
nmod_t mod)
{
ulong mask = (UWORD(1) << b) - 1;
/* (x0, x1) and (y0, y1) are two-digit windows into X and Y. */
ulong x1, x0 = *op1++;
ulong y0, y1, borrow;
op2 += n;
y1 = *op2--;
borrow = 0;
/* plain reduction version */
for (; n; n--)
{
y0 = *op2--;
x1 = *op1++;
if (y0 < x0)
y1--;
NMOD_RED(*res, x0 + (y1 << b), mod);
res += s;
y1 += borrow;
borrow = (x1 < y1);
x1 -= y1;
y1 = (y0 - x0) & mask;
x0 = x1 & mask;
}
}
/*
Same as _nmod_poly_KS2_recover_reduce(), but requires
FLINT_BITS < 2 * b < 2*FLINT_BITS
*/
void
_nmod_poly_KS2_recover_reduce2(mp_ptr res, slong s, mp_srcptr op1,
mp_srcptr op2, slong n, ulong b,
nmod_t mod)
{
/*
The main loop is the same as in _nmod_poly_KS2_recover_reduce1(), but the
modular reduction step needs to handle two input words.
*/
ulong mask = (UWORD(1) << b) - 1;
ulong x1, x0 = *op1++;
ulong y0, y1, borrow, b2;
op2 += n;
y1 = *op2--;
borrow = 0;
b2 = FLINT_BITS - b;
/* plain reduction version */
for (; n; n--)
{
y0 = *op2--;
x1 = *op1++;
if (y0 < x0)
y1--;
NMOD2_RED2(*res, y1 >> b2, x0 + (y1 << b), mod);
res += s;
y1 += borrow;
borrow = (x1 < y1);
x1 -= y1;
y1 = (y0 - x0) & mask;
x0 = x1 & mask;
}
}
/*
Same as _nmod_poly_KS2_recover_reduce(), but requires b == FLINT_BITS
*/
void
_nmod_poly_KS2_recover_reduce2b(mp_ptr res, slong s, mp_srcptr op1,
mp_srcptr op2, slong n, ulong b,
nmod_t mod)
{
/*
Basically the same code as _nmod_poly_KS2_recover_reduce2(), specialised
for b == FLINT_BITS.
*/
ulong x1, x0 = *op1++;
ulong y0, y1, borrow;
op2 += n;
y1 = *op2--;
borrow = 0;
/* plain reduction version */
for (; n; n--)
{
y0 = *op2--;
x1 = *op1++;
if (y0 < x0)
y1--;
NMOD2_RED2(*res, y1, x0, mod);
res += s;
y1 += borrow;
borrow = (x1 < y1);
x1 -= y1;
y1 = y0 - x0;
x0 = x1;
}
}
/*
Same as _nmod_poly_KS2_recover_reduce(), but requires
2 * FLINT_BITS < 2 * b <= 3 * FLINT_BITS.
*/
void
_nmod_poly_KS2_recover_reduce3(mp_ptr res, slong s, mp_srcptr op1,
mp_srcptr op2, slong n, ulong b,
nmod_t mod)
{
/*
The main loop is the same as in zn_array_recover_reduce1(), but needs
to operate on double-word quantities everywhere, i.e. we simulate
double-word registers. The suffixes L and H stand for low and high words
of each.
*/
ulong maskH = (UWORD(1) << (b - FLINT_BITS)) - 1;
ulong x1L, x0L = *op1++;
ulong x1H, x0H = *op1++;
ulong y0H, y1H, y0L, y1L;
ulong borrow, b1, b2;
op2 += 2 * n + 1;
y1H = *op2--;
y1L = *op2--;
borrow = 0;
b1 = b - FLINT_BITS;
b2 = 2 * FLINT_BITS - b;
/* plain reduction version */
for (; n; n--)
{
y0H = *op2--;
y0L = *op2--;
x1L = *op1++;
x1H = *op1++;
if ((y0H < x0H) || (y0H == x0H && y0L < x0L))
y1H -= (y1L-- == 0);
NMOD_RED3(*res, (y1H << b1) + (y1L >> b2),
(y1L << b1) + x0H, x0L, mod);
res += s;
if (borrow)
y1H += (++y1L == 0);
borrow = ((x1H < y1H) || (x1H == y1H && x1L < y1L));
sub_ddmmss(x1H, x1L, x1H, x1L, y1H, y1L);
sub_ddmmss(y1H, y1L, y0H, y0L, x0H, x0L);
y1H &= maskH;
x0L = x1L;
x0H = x1H & maskH;
}
}
/*
Dispatches to one of the above routines depending on b.
*/
void
_nmod_poly_KS2_recover_reduce(mp_ptr res, slong s, mp_srcptr op1,
mp_srcptr op2, slong n, ulong b,
nmod_t mod)
{
if (2 * b <= FLINT_BITS)
_nmod_poly_KS2_recover_reduce1(res, s, op1, op2, n, b, mod);
else if (b < FLINT_BITS)
_nmod_poly_KS2_recover_reduce2(res, s, op1, op2, n, b, mod);
else if (b == FLINT_BITS)
_nmod_poly_KS2_recover_reduce2b(res, s, op1, op2, n, b, mod);
else
_nmod_poly_KS2_recover_reduce3(res, s, op1, op2, n, b, mod);
}