/* Copyright (C) 2011 William Hart Copyright (C) 2011 Sebastian Pancratz Copyright (C) 2014 William Hart This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include #include #include "flint.h" #include "fmpz_vec.h" #include "fmpz_mod_poly.h" #include "mpn_extras.h" /* We define a whole bunch of macros here which essentially provide the nmod_poly functionality as far as the setting of coefficient data and lengths is concerned, but which do not do any separate memory allocation. None of these macros support aliasing. */ #define __set(B, lenB, A, lenA) \ do { \ _fmpz_vec_set((B), (A), (lenA)); \ (lenB) = (lenA); \ } while (0) #define __add(C, lenC, A, lenA, B, lenB) \ do { \ _fmpz_mod_poly_add((C), (A), (lenA), (B), (lenB), mod); \ (lenC) = FLINT_MAX((lenA), (lenB)); \ FMPZ_VEC_NORM((C), (lenC)); \ } while (0) #define __sub(C, lenC, A, lenA, B, lenB) \ do { \ _fmpz_mod_poly_sub((C), (A), (lenA), (B), (lenB), mod); \ (lenC) = FLINT_MAX((lenA), (lenB)); \ FMPZ_VEC_NORM((C), (lenC)); \ } while (0) #define __mul(C, lenC, A, lenA, B, lenB) \ do { \ if ((lenA) != 0 && (lenB) != 0) \ { \ if ((lenA) >= (lenB)) \ _fmpz_mod_poly_mul((C), (A), (lenA), (B), (lenB), mod); \ else \ _fmpz_mod_poly_mul((C), (B), (lenB), (A), (lenA), mod); \ (lenC) = (lenA) + (lenB) - 1; \ } \ else \ { \ (lenC) = 0; \ } \ } while (0) #define __divrem(Q, lenQ, R, lenR, A, lenA, B, lenB) \ do { \ if ((lenA) >= (lenB)) \ { \ fmpz_invmod(invB, B + lenB - 1, mod); \ _fmpz_mod_poly_divrem((Q), (R), (A), (lenA), (B), (lenB), invB, mod); \ (lenQ) = (lenA) - (lenB) + 1; \ (lenR) = (lenB) - 1; \ FMPZ_VEC_NORM((R), (lenR)); \ } \ else \ { \ _fmpz_vec_set((R), (A), (lenA)); \ (lenQ) = 0; \ (lenR) = (lenA); \ } \ } while (0) #define __div(Q, lenQ, A, lenA, B, lenB) \ do { \ if ((lenA) >= (lenB)) \ { \ fmpz * __t = _fmpz_vec_init(lenB - 1); \ fmpz_invmod(invB, B + lenB - 1, mod); \ _fmpz_mod_poly_divrem((Q), __t, (A), (lenA), (B), (lenB), invB, mod); \ _fmpz_vec_clear(__t, lenB - 1); \ (lenQ) = (lenA) - (lenB) + 1; \ } \ else \ { \ (lenQ) = 0; \ } \ } while (0) slong _fmpz_mod_poly_xgcd_hgcd(fmpz *G, fmpz *S, fmpz *T, const fmpz *A, slong lenA, const fmpz *B, slong lenB, const fmpz_t mod) { slong lenG, lenS, lenT; if (lenB == 1) { fmpz_set(G + 0, B + 0); fmpz_set_ui(T, 1); lenG = 1; lenS = 0; lenT = 1; } else { slong lenq, lenr, len1 = lenA + lenB; fmpz *q = _fmpz_vec_init(len1); fmpz *r = q + lenA; fmpz_t invB; fmpz_init(invB); __divrem(q, lenq, r, lenr, A, lenA, B, lenB); if (lenr == 0) { __set(G, lenG, B, lenB); fmpz_set_ui(T + 0, 1); lenS = 0; lenT = 1; } else { fmpz *h, *j, *v, *w, *R[4], *X; slong lenh, lenj, lenv, lenw, lenR[4], len2; int sgnR; lenh = lenj = lenB; lenv = lenw = lenA + lenB - 2; lenR[0] = lenR[1] = lenR[2] = lenR[3] = (lenB + 1) / 2; len2 = 2 * lenh + 2 * lenv + 4 * lenR[0]; X = _fmpz_vec_init(len2); h = X; j = h + lenh; v = j + lenj; w = v + lenv; R[0] = w + lenw; R[1] = R[0] + lenR[0]; R[2] = R[1] + lenR[1]; R[3] = R[2] + lenR[2]; sgnR = _fmpz_mod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, B, lenB, r, lenr, mod); if (sgnR > 0) { _fmpz_mod_poly_neg(S, R[1], lenR[1], mod); _fmpz_vec_set(T, R[0], lenR[0]); } else { _fmpz_vec_set(S, R[1], lenR[1]); _fmpz_mod_poly_neg(T, R[0], lenR[0], mod); } lenS = lenR[1]; lenT = lenR[0]; while (lenj != 0) { __divrem(q, lenq, r, lenr, h, lenh, j, lenj); __mul(v, lenv, q, lenq, T, lenT); { slong l; _fmpz_vec_swap(S, T, FLINT_MAX(lenS, lenT)); l = lenS; lenS = lenT; lenT = l; } if (lenr != 0) /* prevent overflow of T on last iteration */ __sub(T, lenT, T, lenT, v, lenv); else /* lenr == 0 */ { __set(G, lenG, j, lenj); goto cofactor; } if (lenj < FMPZ_MOD_POLY_GCD_CUTOFF) { fmpz *u0 = R[0], *u1 = R[1]; slong lenu0 = lenr - 1, lenu1 = lenj - 1; fmpz_invmod(invB, r + lenr - 1, mod); lenG = _fmpz_mod_poly_xgcd_euclidean(G, u0, u1, j, lenj, r, lenr, invB, mod); FMPZ_VEC_NORM(u0, lenu0); FMPZ_VEC_NORM(u1, lenu1); __mul(v, lenv, S, lenS, u0, lenu0); __mul(w, lenw, T, lenT, u1, lenu1); __add(S, lenS, v, lenv, w, lenw); goto cofactor; } sgnR = _fmpz_mod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, j,lenj, r, lenr, mod); __mul(v, lenv, R[1], lenR[1], T, lenT); __mul(w, lenw, R[2], lenR[2], S, lenS); __mul(q, lenq, S, lenS, R[3], lenR[3]); if (sgnR > 0) __sub(S, lenS, q, lenq, v, lenv); else __sub(S, lenS, v, lenv, q, lenq); __mul(q, lenq, T, lenT, R[0], lenR[0]); if (sgnR > WORD(0)) __sub(T, lenT, q, lenq, w, lenw); else __sub(T, lenT, w, lenw, q, lenq); } __set(G, lenG, h, lenh); cofactor: __mul(v, lenv, S, lenS, A, lenA); __sub(w, lenw, G, lenG, v, lenv); __div(T, lenT, w, lenw, B, lenB); _fmpz_vec_clear(X, len2); } _fmpz_vec_clear(q, len1); fmpz_clear(invB); } _fmpz_vec_zero(S + lenS, lenB - 1 - lenS); _fmpz_vec_zero(T + lenT, lenA - 1 - lenT); return lenG; } void fmpz_mod_poly_xgcd_hgcd(fmpz_mod_poly_t G, fmpz_mod_poly_t S, fmpz_mod_poly_t T, const fmpz_mod_poly_t A, const fmpz_mod_poly_t B, const fmpz_mod_ctx_t ctx) { if (A->length < B->length) { fmpz_mod_poly_xgcd_hgcd(G, T, S, B, A, ctx); } else /* lenA >= lenB >= 0 */ { const fmpz * p = fmpz_mod_ctx_modulus(ctx); const slong lenA = A->length, lenB = B->length; slong lenS, lenT; fmpz_t inv; fmpz_init(inv); if (lenA == 0) /* lenA = lenB = 0 */ { fmpz_mod_poly_zero(G, ctx); fmpz_mod_poly_zero(S, ctx); fmpz_mod_poly_zero(T, ctx); } else if (lenB == 0) /* lenA > lenB = 0 */ { fmpz_invmod(inv, A->coeffs + lenA - 1, p); fmpz_mod_poly_scalar_mul_fmpz(G, A, inv, ctx); fmpz_mod_poly_zero(T, ctx); fmpz_mod_poly_set_coeff_fmpz(S, 0, inv, ctx); _fmpz_mod_poly_set_length(S, 1); } else if (lenB == 1) /* lenA >= lenB = 1 */ { fmpz_mod_poly_fit_length(T, 1, ctx); _fmpz_mod_poly_set_length(T, 1); fmpz_invmod(inv, B->coeffs + 0, p); fmpz_set(T->coeffs + 0, inv); fmpz_mod_poly_set_coeff_ui(G, 0, 1, ctx); _fmpz_mod_poly_set_length(G, 1); fmpz_mod_poly_zero(S, ctx); } else /* lenA >= lenB >= 2 */ { fmpz *g, *s, *t; slong lenG; if (G == A || G == B) { g = _fmpz_vec_init(FLINT_MIN(lenA, lenB)); } else { fmpz_mod_poly_fit_length(G, FLINT_MIN(lenA, lenB), ctx); g = G->coeffs; } if (S == A || S == B) { s = _fmpz_vec_init(FLINT_MAX(lenB - 1, 2)); } else { fmpz_mod_poly_fit_length(S, FLINT_MAX(lenB - 1, 2), ctx); s = S->coeffs; } if (T == A || T == B) { t = _fmpz_vec_init(FLINT_MAX(lenA - 1, 2)); } else { fmpz_mod_poly_fit_length(T, FLINT_MAX(lenA - 1, 2), ctx); t = T->coeffs; } if (lenA >= lenB) lenG = _fmpz_mod_poly_xgcd_hgcd(g, s, t, A->coeffs, lenA, B->coeffs, lenB, p); else lenG = _fmpz_mod_poly_xgcd_hgcd(g, t, s, B->coeffs, lenB, A->coeffs, lenA, p); if (G == A || G == B) { _fmpz_vec_clear(G->coeffs, FLINT_MIN(lenA, lenB)); G->coeffs = g; G->alloc = FLINT_MIN(lenA, lenB); } if (S == A || S == B) { _fmpz_vec_clear(S->coeffs, FLINT_MAX(lenB - 1, 2)); S->coeffs = s; S->alloc = FLINT_MAX(lenB - 1, 2); } if (T == A || T == B) { _fmpz_vec_clear(T->coeffs, FLINT_MAX(lenA - 1, 2)); T->coeffs = t; T->alloc = FLINT_MAX(lenA - 1, 2); } _fmpz_mod_poly_set_length(G, lenG); lenS = FLINT_MAX(lenB - lenG, 1); lenT = FLINT_MAX(lenA - lenG, 1); FMPZ_VEC_NORM(S->coeffs, lenS); FMPZ_VEC_NORM(T->coeffs, lenT); _fmpz_mod_poly_set_length(S, lenS); _fmpz_mod_poly_set_length(T, lenT); if (!fmpz_is_one(G->coeffs + lenG - 1)) { fmpz_invmod(inv, G->coeffs + lenG - 1, p); fmpz_mod_poly_scalar_mul_fmpz(G, G, inv, ctx); fmpz_mod_poly_scalar_mul_fmpz(S, S, inv, ctx); fmpz_mod_poly_scalar_mul_fmpz(T, T, inv, ctx); } } fmpz_clear(inv); } } #undef __set #undef __add #undef __sub #undef __mul #undef __divrem #undef __div