/*
Copyright (C) 2008, 2009, William Hart
Copyright (C) 2010 Fredrik Johansson
Copyright (C) 2021 Daniel Schultz
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See .
*/
#include
#include "flint.h"
#include "ulong_extras.h"
#include "mpn_extras.h"
#include "fmpz.h"
#include "nmod_vec.h"
#define MAC(h, l, a, b) \
do { \
mp_limb_t p1, p0; \
umul_ppmm(p1, p0, a, b); \
add_ssaaaa(h, l, h, l, p1, p0); \
} while (0)
void fmpz_multi_CRT_ui(
fmpz_t b,
mp_srcptr in,
const fmpz_comb_t C,
fmpz_comb_temp_t CT,
int sign)
{
slong i, j, k, l, s;
slong klen = C->crt_klen;
slong * step = C->step;
crt_lut_entry * lu = C->crt_lu;
fmpz * T = CT->T;
fmpz * A = CT->A;
slong * offsets = C->crt_offsets;
const mp_limb_t * md = C->packed_multipliers;
mpz_ptr az;
mp_limb_t * ad;
mp_limb_t hi, lo, t;
for (k = 0, i = 0, l = 0; k < klen; k++)
{
s = step[k];
j = offsets[k];
az = _fmpz_promote(A + k);
if (s < 0)
{
/*
every low level combination in this chunk has 1 prime
and md already has lu[i].i0 pre-multiplied in.
*/
s = -s - 1;
ad = FLINT_MPZ_REALLOC(az, s + 2);
flint_mpn_zero(ad, s + 2);
hi = lo = 0;
for ( ; i < j; md += s, l++, i++)
{
FLINT_ASSERT(lu[i].i0 != 0);
FLINT_ASSERT(lu[i].i1 == 0);
FLINT_ASSERT(lu[i].i2 == 0);
t = mpn_addmul_1(ad, md, s, in[l*1]);
add_ssaaaa(hi, lo, hi, lo, UWORD(0), t);
}
ad[s] = lo;
ad[s + 1] = hi;
}
else
{
ad = FLINT_MPZ_REALLOC(az, s + 2);
flint_mpn_zero(ad, s + 2);
for ( ; i < j; md += s, i++)
{
/* low level combination: 1, 2, or 3 small primes */
FLINT_ASSERT(l + 1 <= C->num_primes);
umul_ppmm(hi, lo, in[l*1], lu[i].i0); l++;
if (lu[i].i2 != 0)
{
FLINT_ASSERT(l + 2 <= C->num_primes);
MAC(hi, lo, in[l*1], lu[i].i1); l++;
MAC(hi, lo, in[l*1], lu[i].i2); l++;
/*
We have lu[i].mod.n = p0*p1*p2, and each lu[i].i{0|1|2} is
strictly less than p1*p2*p3, and the inputs are reduced mod pi.
Therefore, the sum is at most (p0*p1*p2-1)*(p0-1+p1-1+p2-1).
Since p0*p1*p2 fits into a word, the sum fits into two words
and the hi word is less than p0*p1*p2.
*/
}
else if (lu[i].i1 != 0)
{
FLINT_ASSERT(l + 1 <= C->num_primes);
MAC(hi, lo, in[l*1], lu[i].i1); l++;
/* Ditto for two */
}
FLINT_ASSERT(hi < lu[i].mod.n);
NMOD_RED2(t, hi, lo, lu[i].mod);
/* mid level combination: depends on FMPZ_CRT_UI_CUTOFF */
hi = mpn_addmul_1(ad, md, s, t);
add_ssaaaa(ad[s + 1], ad[s], ad[s + 1], ad[s], UWORD(0), hi);
}
}
s += 2;
MPN_NORM(ad, s);
az->_mp_size = s;
_fmpz_demote_val(A + k);
_fmpz_smod(A + k, A + k, C->crt_P->moduli + k, sign, T + 0);
}
FLINT_ASSERT(l == C->num_primes);
/* high level combination */
fmpz_swap(T + 0, b);
_fmpz_multi_CRT_precomp(T, C->crt_P, A, sign);
fmpz_swap(T + 0, b);
}