/* Copyright (C) 2017-2020 Daniel Schultz This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include "nmod_mpoly.h" static int _nmod_mpoly_divrem_monagan_pearce1_binomial( nmod_mpoly_t Q, nmod_mpoly_t R, const mp_limb_t * Acoeffs, const ulong * Aexps, slong Alen, const mp_limb_t * Bcoeffs, const ulong * Bexps, flint_bitcnt_t bits, ulong maskhi, nmod_t mod) { mp_limb_t * Qcoeffs = Q->coeffs; mp_limb_t * Rcoeffs = R->coeffs; ulong * Qexps = Q->exps; ulong * Rexps = R->exps; ulong lexp, mask = mpoly_overflow_mask_sp(bits); mp_limb_t lcoeff; mp_limb_t lc_inv = nmod_inv(Bcoeffs[0], mod); mp_limb_t mBcoeff1 = mod.n - Bcoeffs[1]; slong Qlen = 0; slong Rlen = 0; slong Aidx = 0; slong Qidx = 0; while (1) { FLINT_ASSERT(0 <= Aidx && Aidx <= Alen); FLINT_ASSERT(0 <= Qidx && Qidx <= Qlen); if (Aidx < Alen) { lexp = Aexps[Aidx]; if (Qidx < Qlen) { ulong thisexp = Bexps[1] + Qexps[Qidx]; int cmp = mpoly_monomial_cmp1(lexp, thisexp, maskhi); if (cmp < 0) { lexp = thisexp; lcoeff = nmod_mul(mBcoeff1, Qcoeffs[Qidx], mod); Qidx++; } else if (cmp == 0) { lcoeff = Acoeffs[Aidx]; NMOD_ADDMUL(lcoeff, mBcoeff1, Qcoeffs[Qidx], mod); Aidx++; Qidx++; } else { lcoeff = Acoeffs[Aidx]; Aidx++; } } else { lcoeff = Acoeffs[Aidx]; Aidx++; } } else if (Qidx < Qlen) { lexp = Bexps[1] + Qexps[Qidx]; lcoeff = nmod_mul(mBcoeff1, Qcoeffs[Qidx], mod); Qidx++; } else { break; } if (mpoly_monomial_overflows1(lexp, mask)) goto exp_overflow; if (lcoeff == 0) continue; _nmod_mpoly_fit_length(&Qcoeffs, &Q->coeffs_alloc, &Qexps, &Q->exps_alloc, 1, Qlen + 1); if (!mpoly_monomial_divides1(Qexps + Qlen, lexp, Bexps[0], mask)) { _nmod_mpoly_fit_length(&Rcoeffs, &R->coeffs_alloc, &Rexps, &R->exps_alloc, 1, Rlen + 1); Rcoeffs[Rlen] = lcoeff; Rexps[Rlen] = lexp; Rlen++; continue; } if (lc_inv == 1) Qcoeffs[Qlen] = lcoeff; else Qcoeffs[Qlen] = nmod_mul(lcoeff, lc_inv, mod); Qlen++; } Q->coeffs = Qcoeffs; Q->exps = Qexps; Q->length = Qlen; R->coeffs = Rcoeffs; R->exps = Rexps; R->length = Rlen; return 1; exp_overflow: Q->coeffs = Qcoeffs; Q->exps = Qexps; Q->length = 0; R->coeffs = Rcoeffs; R->exps = Rexps; R->length = 0; return 0; } static int _nmod_mpoly_divrem_monagan_pearce1( nmod_mpoly_t Q, nmod_mpoly_t R, const mp_limb_t * Acoeffs, const ulong * Aexps, slong Alen, const mp_limb_t * Bcoeffs, const ulong * Bexps, slong Blen, flint_bitcnt_t bits, ulong maskhi, nmod_t fctx) { slong i, j, Qlen, Rlen, s; slong next_loc, heap_len = 2; mpoly_heap1_s * heap; mpoly_heap_t * chain; slong * store, * store_base; mpoly_heap_t * x; mp_limb_t * Qcoeffs = Q->coeffs; mp_limb_t * Rcoeffs = R->coeffs; ulong * Qexps = Q->exps; ulong * Rexps = R->exps; slong * hind; ulong mask, exp; int lt_divides; mp_limb_t lc_minus_inv, acc0, acc1, acc2, pp1, pp0; TMP_INIT; TMP_START; /* alloc array of heap nodes which can be chained together */ next_loc = Blen + 4; /* something bigger than heap can ever be */ heap = (mpoly_heap1_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap1_s)); chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t)); store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong)); /* space for flagged heap indicies */ hind = (slong *) TMP_ALLOC(Blen*sizeof(slong)); for (i = 0; i < Blen; i++) hind[i] = 1; mask = mpoly_overflow_mask_sp(bits); /* quotient and remainder poly indices start at -1 */ Qlen = WORD(0); Rlen = WORD(0); /* s is the number of terms * (latest quotient) we should put into heap */ s = Blen; /* insert (-1, 0, Aexps[0]) into heap */ x = chain + 0; x->i = -WORD(1); x->j = 0; x->next = NULL; HEAP_ASSIGN(heap[1], Aexps[0], x); /* precompute leading cofficient info */ lc_minus_inv = fctx.n - nmod_inv(Bcoeffs[0], fctx); while (heap_len > 1) { exp = heap[1].exp; if (mpoly_monomial_overflows1(exp, mask)) goto exp_overflow; _nmod_mpoly_fit_length(&Qcoeffs, &Q->coeffs_alloc, &Qexps, &Q->exps_alloc, 1, Qlen + 1); lt_divides = mpoly_monomial_divides1(Qexps + Qlen, exp, Bexps[0], mask); acc0 = acc1 = acc2 = 0; do { x = _mpoly_heap_pop1(heap, &heap_len, maskhi); do { *store++ = x->i; *store++ = x->j; if (x->i == -WORD(1)) { add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, UWORD(0), UWORD(0), fctx.n - Acoeffs[x->j]); } else { hind[x->i] |= WORD(1); umul_ppmm(pp1, pp0, Bcoeffs[x->i], Qcoeffs[x->j]); add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, UWORD(0), pp1, pp0); } } while ((x = x->next) != NULL); } while (heap_len > 1 && heap[1].exp == exp); NMOD_RED3(acc0, acc2, acc1, acc0, fctx); /* process nodes taken from the heap */ while (store > store_base) { j = *--store; i = *--store; if (i == -WORD(1)) { /* take next dividend term */ if (j + 1 < Alen) { x = chain + 0; x->i = i; x->j = j + 1; x->next = NULL; _mpoly_heap_insert1(heap, Aexps[x->j], x, &next_loc, &heap_len, maskhi); } } else { /* should we go right? */ if ( (i + 1 < Blen) && (hind[i + 1] == 2*j + 1) ) { x = chain + i + 1; x->i = i + 1; x->j = j; x->next = NULL; hind[x->i] = 2*(x->j + 1) + 0; _mpoly_heap_insert1(heap, Bexps[x->i] + Qexps[x->j], x, &next_loc, &heap_len, maskhi); } /* should we go up? */ if (j + 1 == Qlen) { s++; } else if ( ((hind[i] & 1) == 1) && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1)) ) { x = chain + i; x->i = i; x->j = j + 1; x->next = NULL; hind[x->i] = 2*(x->j + 1) + 0; _mpoly_heap_insert1(heap, Bexps[x->i] + Qexps[x->j], x, &next_loc, &heap_len, maskhi); } } } /* try to divide accumulated term by leading term */ if (acc0 == 0) { continue; } if (!lt_divides) { _nmod_mpoly_fit_length(&Rcoeffs, &R->coeffs_alloc, &Rexps, &R->exps_alloc, 1, Rlen + 1); Rcoeffs[Rlen] = fctx.n - acc0; Rexps[Rlen] = exp; Rlen++; continue; } Qcoeffs[Qlen] = nmod_mul(acc0, lc_minus_inv, fctx); /* put newly generated quotient term back into the heap if neccesary */ if (s > 1) { i = 1; x = chain + i; x->i = i; x->j = Qlen; x->next = NULL; hind[x->i] = 2*(x->j + 1) + 0; _mpoly_heap_insert1(heap, Bexps[x->i] + Qexps[x->j], x, &next_loc, &heap_len, maskhi); } s = 1; Qlen++; } Q->coeffs = Qcoeffs; Q->exps = Qexps; Q->length = Qlen; R->coeffs = Rcoeffs; R->exps = Rexps; R->length = Rlen; TMP_END; return 1; exp_overflow: Q->coeffs = Qcoeffs; Q->exps = Qexps; Q->length = 0; R->coeffs = Rcoeffs; R->exps = Rexps; R->length = 0; TMP_END; return 0; } static int _nmod_mpoly_divrem_monagan_pearce( nmod_mpoly_t Q, nmod_mpoly_t R, const mp_limb_t * Acoeffs, const ulong * Aexps, slong Alen, const mp_limb_t * Bcoeffs, const ulong * Bexps, slong Blen, slong bits, slong N, const ulong * cmpmask, nmod_t fctx) { slong i, j, Qlen, Rlen, s; slong next_loc; slong heap_len = 2; /* heap zero index unused */ mpoly_heap_s * heap; mpoly_heap_t * chain; slong * store, * store_base; mpoly_heap_t * x; mp_limb_t * Qcoeffs = Q->coeffs; mp_limb_t * Rcoeffs = R->coeffs; ulong * Qexps = Q->exps; ulong * Rexps = R->exps; ulong * exp, * exps; ulong ** exp_list; slong exp_next; ulong mask; slong * hind; int lt_divides; mp_limb_t lc_minus_inv, acc0, acc1, acc2, pp1, pp0; TMP_INIT; if (N == 1) { if (Blen == 2) return _nmod_mpoly_divrem_monagan_pearce1_binomial(Q, R, Acoeffs, Aexps, Alen, Bcoeffs, Bexps, bits, cmpmask[0], fctx); else return _nmod_mpoly_divrem_monagan_pearce1(Q, R, Acoeffs, Aexps, Alen, Bcoeffs, Bexps, Blen, bits, cmpmask[0], fctx); } TMP_START; /* alloc array of heap nodes which can be chained together */ next_loc = Blen + 4; /* something bigger than heap can ever be */ heap = (mpoly_heap_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap_s)); chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t)); store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong)); /* array of exponent vectors, each of "N" words */ exps = (ulong *) TMP_ALLOC(Blen*N*sizeof(ulong)); /* list of pointers to available exponent vectors */ exp_list = (ulong **) TMP_ALLOC(Blen*sizeof(ulong *)); /* space to save copy of current exponent vector */ exp = (ulong *) TMP_ALLOC(N*sizeof(ulong)); /* set up list of available exponent vectors */ exp_next = 0; for (i = 0; i < Blen; i++) exp_list[i] = exps + i*N; /* space for flagged heap indicies */ hind = (slong *) TMP_ALLOC(Blen*sizeof(slong)); for (i = 0; i < Blen; i++) hind[i] = 1; mask = bits <= FLINT_BITS ? mpoly_overflow_mask_sp(bits) : 0; Qlen = WORD(0); Rlen = WORD(0); /* s is the number of terms * (latest quotient) we should put into heap */ s = Blen; /* insert (-1, 0, Aexps[0]) into heap */ x = chain + 0; x->i = -WORD(1); x->j = 0; x->next = NULL; heap[1].next = x; heap[1].exp = exp_list[exp_next++]; mpoly_monomial_set(heap[1].exp, Aexps, N); /* precompute leading cofficient info */ lc_minus_inv = fctx.n - nmod_inv(Bcoeffs[0], fctx); while (heap_len > 1) { _nmod_mpoly_fit_length(&Qcoeffs, &Q->coeffs_alloc, &Qexps, &Q->exps_alloc, N, Qlen + 1); mpoly_monomial_set(exp, heap[1].exp, N); if (bits <= FLINT_BITS) { if (mpoly_monomial_overflows(exp, N, mask)) goto exp_overflow2; lt_divides = mpoly_monomial_divides(Qexps + Qlen*N, exp, Bexps, N, mask); } else { if (mpoly_monomial_overflows_mp(exp, N, bits)) goto exp_overflow2; lt_divides = mpoly_monomial_divides_mp(Qexps + Qlen*N, exp, Bexps, N, bits); } acc0 = acc1 = acc2 = 0; do { exp_list[--exp_next] = heap[1].exp; x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask); do { *store++ = x->i; *store++ = x->j; if (x->i == -WORD(1)) { add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, UWORD(0), UWORD(0), fctx.n - Acoeffs[x->j]); } else { hind[x->i] |= WORD(1); umul_ppmm(pp1, pp0, Bcoeffs[x->i], Qcoeffs[x->j]); add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, UWORD(0), pp1, pp0); } } while ((x = x->next) != NULL); } while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N)); NMOD_RED3(acc0, acc2, acc1, acc0, fctx); /* process nodes taken from the heap */ while (store > store_base) { j = *--store; i = *--store; if (i == -WORD(1)) { /* take next dividend term */ if (j + 1 < Alen) { x = chain + 0; x->i = i; x->j = j + 1; x->next = NULL; mpoly_monomial_set(exp_list[exp_next], Aexps + x->j*N, N); exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x, &next_loc, &heap_len, N, cmpmask); } } else { /* should we go right? */ if ( (i + 1 < Blen) && (hind[i + 1] == 2*j + 1) ) { x = chain + i + 1; x->i = i + 1; x->j = j; x->next = NULL; hind[x->i] = 2*(x->j + 1) + 0; mpoly_monomial_add_mp(exp_list[exp_next], Bexps + x->i*N, Qexps + x->j*N, N); exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x, &next_loc, &heap_len, N, cmpmask); } /* should we go up? */ if (j + 1 == Qlen) { s++; } else if ( ((hind[i] & 1) == 1) && ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1)) ) { x = chain + i; x->i = i; x->j = j + 1; x->next = NULL; hind[x->i] = 2*(x->j + 1) + 0; mpoly_monomial_add_mp(exp_list[exp_next], Bexps + x->i*N, Qexps + x->j*N, N); exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x, &next_loc, &heap_len, N, cmpmask); } } } /* try to divide accumulated term by leading term */ if (acc0 == 0) continue; if (!lt_divides) { _nmod_mpoly_fit_length(&Rcoeffs, &R->coeffs_alloc, &Rexps, &R->exps_alloc, N, Rlen + 1); Rcoeffs[Rlen] = fctx.n - acc0; mpoly_monomial_set(Rexps + Rlen*N, exp, N); Rlen++; continue; } Qcoeffs[Qlen] = nmod_mul(acc0, lc_minus_inv, fctx); /* put newly generated quotient term back into the heap if neccesary */ if (s > 1) { i = 1; x = chain + i; x->i = i; x->j = Qlen; x->next = NULL; hind[x->i] = 2*(x->j + 1) + 0; mpoly_monomial_add_mp(exp_list[exp_next], Bexps + x->i*N, Qexps + x->j*N, N); exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x, &next_loc, &heap_len, N, cmpmask); } s = 1; Qlen++; } Q->coeffs = Qcoeffs; Q->exps = Qexps; Q->length = Qlen; R->coeffs = Rcoeffs; R->exps = Rexps; R->length = Rlen; TMP_END; return 1; exp_overflow2: Q->coeffs = Qcoeffs; Q->exps = Qexps; Q->length = 0; R->coeffs = Rcoeffs; R->exps = Rexps; R->length = 0; TMP_END; return 0; } void nmod_mpoly_divrem_monagan_pearce( nmod_mpoly_t Q, nmod_mpoly_t R, const nmod_mpoly_t A, const nmod_mpoly_t B, const nmod_mpoly_ctx_t ctx) { slong N; flint_bitcnt_t QRbits; ulong * Aexps = A->exps, * Bexps = B->exps; ulong * cmpmask; int freeAexps = 0, freeBexps = 0; nmod_mpoly_t TQ, TR; nmod_mpoly_struct * q, * r; if (nmod_mpoly_is_zero(B, ctx)) { if (nmod_mpoly_ctx_modulus(ctx) == 1) { nmod_mpoly_zero(Q, ctx); nmod_mpoly_zero(R, ctx); return; } else { flint_throw(FLINT_DIVZERO, "nmod_mpoly_divrem_monagan_pearce: divide by zero"); } } if (nmod_mpoly_is_zero(A, ctx)) { nmod_mpoly_zero(Q, ctx); nmod_mpoly_zero(R, ctx); return; } nmod_mpoly_init(TQ, ctx); nmod_mpoly_init(TR, ctx); QRbits = FLINT_MAX(A->bits, B->bits); QRbits = mpoly_fix_bits(QRbits, ctx->minfo); N = mpoly_words_per_exp(QRbits, ctx->minfo); cmpmask = (ulong *) flint_malloc(N*sizeof(ulong)); mpoly_get_cmpmask(cmpmask, N, QRbits, ctx->minfo); /* ensure input exponents packed to same size as output exponents */ if (QRbits != A->bits) { freeAexps = 1; Aexps = (ulong *) flint_malloc(N*A->length*sizeof(ulong)); mpoly_repack_monomials(Aexps, QRbits, A->exps, A->bits, A->length, ctx->minfo); } if (QRbits != B->bits) { freeBexps = 1; Bexps = (ulong *) flint_malloc(N*B->length*sizeof(ulong)); mpoly_repack_monomials(Bexps, QRbits, B->exps, B->bits, B->length, ctx->minfo); } /* check divisor leading monomial is at most that of the dividend */ if (mpoly_monomial_lt(Aexps, Bexps, N, cmpmask)) { nmod_mpoly_set(R, A, ctx); nmod_mpoly_zero(Q, ctx); goto cleanup; } /* take care of aliasing */ if (Q == A || Q == B) q = TQ; else q = Q; if (R == A || R == B) r = TR; else r = R; /* do division with remainder */ while (1) { nmod_mpoly_fit_length_reset_bits(q, A->length/B->length + 1, QRbits, ctx); nmod_mpoly_fit_length_reset_bits(r, B->length, QRbits, ctx); if (_nmod_mpoly_divrem_monagan_pearce(q, r, A->coeffs, Aexps, A->length, B->coeffs, Bexps, B->length, QRbits, N, cmpmask, ctx->mod)) { break; } QRbits = mpoly_fix_bits(QRbits + 1, ctx->minfo); N = mpoly_words_per_exp(QRbits, ctx->minfo); cmpmask = (ulong *) flint_realloc(cmpmask, N*sizeof(ulong)); mpoly_get_cmpmask(cmpmask, N, QRbits, ctx->minfo); if (freeAexps) flint_free(Aexps); Aexps = (ulong *) flint_malloc(N*A->length*sizeof(ulong)); mpoly_repack_monomials(Aexps, QRbits, A->exps, A->bits, A->length, ctx->minfo); freeAexps = 1; if (freeBexps) flint_free(Bexps); Bexps = (ulong *) flint_malloc(N*B->length*sizeof(ulong)); mpoly_repack_monomials(Bexps, QRbits, B->exps, B->bits, B->length, ctx->minfo); freeBexps = 1; } /* deal with aliasing */ if (Q == A || Q == B) nmod_mpoly_swap(Q, TQ, ctx); if (R == A || R == B) nmod_mpoly_swap(R, TR, ctx); cleanup: nmod_mpoly_clear(TQ, ctx); nmod_mpoly_clear(TR, ctx); if (freeAexps) flint_free(Aexps); if (freeBexps) flint_free(Bexps); flint_free(cmpmask); }