/* Modular multiplication. Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Paul Zimmermann, Alexander Kruppa and Cyril Bouvier. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "ecm-gmp.h" #include "ecm-impl.h" #include "mpmod.h" #ifdef USE_ASM_REDC #include "mulredc.h" #endif FILE *ECM_STDOUT, *ECM_STDERR; /* define them here since needed in tune.c */ /* define WANT_ASSERT to check normalization of residues */ /* #define WANT_ASSERT 1 */ /* #define DEBUG */ /* #define WANT_ASSERT_EXPENSIVE 1 */ #define ASSERT_NORMALIZED(x) ASSERT ((modulus->repr != ECM_MOD_MODMULN && \ modulus->repr != ECM_MOD_REDC) || \ mpz_size (x) <= mpz_size (modulus->orig_modulus)) #define MPZ_NORMALIZED(x) ASSERT (PTR(x)[ABSIZ(x)-1] != 0) static void ecm_redc_basecase (mpz_ptr, mpz_ptr, mpmod_t) ATTRIBUTE_HOT; static void ecm_mulredc_basecase (mpres_t, const mpres_t, const mpres_t, mpmod_t) ATTRIBUTE_HOT; static void base2mod (mpres_t, const mpres_t, mpres_t, mpmod_t) ATTRIBUTE_HOT; static void REDC (mpres_t, const mpres_t, mpz_t, mpmod_t); /* returns +/-l if n is a factor of N = 2^l +/- 1 with N <= n^threshold, 0 otherwise. */ int isbase2 (const mpz_t n, const double threshold) { unsigned int k, lo; int res = 0; mpz_t u, w; mpz_init (u); mpz_init (w); lo = mpz_sizeinbase (n, 2) - 1; /* 2^lo <= n < 2^(lo+1) */ mpz_set_ui (u, 1UL); mpz_mul_2exp (u, u, 2UL * lo); mpz_mod (w, u, n); /* 2^(2lo) mod n = -/+2^(2lo-l) if m*n = 2^l+/-1 */ if (mpz_cmp_ui (w, 1UL) == 0) /* if 2^(2lo) mod n = 1, then n divides 2^(2lo)-1. If algebraic factors have been removed, n divides either 2^lo+1 or 2^lo-1. But since n has lo+1 bits, n can only divide 2^lo+1. More precisely, n must be 2^lo+1. */ { /* check that n equals 2^lo+1. Since n divides 2^(2lo)-1, n is odd. */ if (mpz_scan1 (n, 1UL) != lo) lo = 0; mpz_clear (w); mpz_clear (u); return lo; } k = mpz_sizeinbase (w, 2) - 1; /* if w = 2^k then n divides 2^(2*lo-k)-1 */ mpz_set_ui (u, 1UL); mpz_mul_2exp (u, u, k); if (mpz_cmp(w, u) == 0) res = k - 2 * lo; else /* if w = -2^k then n divides 2^(2*lo-k)+1 */ { mpz_neg (w, w); mpz_mod (w, w, n); k = mpz_sizeinbase (w, 2) - 1; mpz_set_ui (u, 1UL); mpz_mul_2exp (u, u, k); if (mpz_cmp (w, u) == 0) res = 2 * lo - k; } mpz_clear (u); mpz_clear (w); if (abs (res) > (int) (threshold * (double) lo)) res = 0; if (abs (res) < 16) res = 0; return res; } /* Do base-2 reduction. R must not equal S or t. */ static void base2mod (mpres_t R, const mpres_t S, mpres_t t, mpmod_t modulus) { unsigned long absbits = abs (modulus->bits); ASSERT (R != S && R != t); mpz_tdiv_q_2exp (R, S, absbits); mpz_tdiv_r_2exp (t, S, absbits); if (modulus->bits < 0) mpz_add (R, R, t); else mpz_sub (R, t, R); /* mpz_mod (R, R, modulus->orig_modulus); */ while (mpz_sizeinbase (R, 2) > absbits) { mpz_tdiv_q_2exp (t, R, absbits); mpz_tdiv_r_2exp (R, R, absbits); if (modulus->bits < 0) mpz_add (R, R, t); else mpz_sub (R, R, t); } } /* Modular reduction modulo the Fermat number 2^m+1. n = m / GMP_NUMB_BITS. Result is < 2^m+1. FIXME: this does not work with nails. Only copies the data to R if reduction is needed and returns 1 in that case. If the value in S is reduced already, nothing is done and 0 is returned. Yes, this is ugly. */ static int base2mod_2 (mpres_t R, const mpres_t S, mp_size_t n, mpz_t modulus) { mp_size_t s; s = ABSIZ(S); if (s > n) { if (s == n + 1) { mp_srcptr sp = PTR(S); mp_ptr rp; MPZ_REALLOC (R, s); rp = PTR(R); if ((rp[n] = mpn_sub_1 (rp, sp, n, sp[n]))) rp[n] = mpn_add_1 (rp, rp, n, rp[n]); MPN_NORMALIZE(rp, s); ASSERT (s <= n || (s == n && rp[n] == 1)); SIZ(R) = (SIZ(S) > 0) ? (int) s : (int) -s; } else /* should happen rarely */ mpz_mod (R, S, modulus); return 1; } return 0; } /* subquadratic REDC, at mpn level. {orig,n} is the original modulus. Requires xn = 2n or 2n-1 and ABSIZ(orig_modulus)=n. */ static void ecm_redc_n (mp_ptr rp, mp_srcptr x0p, mp_size_t xn, mp_srcptr orig, mp_srcptr invm, mp_size_t n) { mp_ptr tp, up, xp; mp_size_t nn = n + n; mp_limb_t cy, cin; TMP_DECL; ASSERT((xn == nn) || (xn == nn - 1)); TMP_MARK; up = TMP_ALLOC_LIMBS(nn + nn); if (xn < nn) { xp = TMP_ALLOC_LIMBS(nn); MPN_COPY (xp, x0p, xn); xp[nn - 1] = 0; } else xp = (mp_ptr) x0p; #ifdef HAVE___GMPN_MULLO_N /* available up from GMP 5.0.0 */ __gmpn_mullo_n (up, xp, invm, n); #else ecm_mul_lo_n (up, xp, invm, n); #endif tp = up + nn; mpn_mul_n (tp, up, orig, n); /* add {x, 2n} and {tp, 2n}. We know that {tp, n} + {xp, n} will give either 0, or a carry out. If xp[n-1] <> 0 or tp[n-1] <> 0, then there is a carry. We use a binary OR, which sets the zero flag if and only if both operands are zero. */ cin = (mp_limb_t) ((xp[n - 1] | tp[n - 1]) ? 1 : 0); // Disabled to compile with GMP 6.3 // #ifdef HAVE___GMPN_ADD_NC // cy = __gmpn_add_nc (rp, tp + n, xp + n, n, cin); // #else cy = mpn_add_n (rp, tp + n, xp + n, n); cy += mpn_add_1 (rp, rp, n, cin); // #endif /* since we add at most N-1 to the upper half of {x0p,2n}, one adjustment is enough */ if (cy) cy -= mpn_sub_n (rp, rp, orig, n); ASSERT (cy == 0); TMP_FREE; } /* REDC. x and t must not be identical, t has limb growth */ /* subquadratic REDC, at mpz level */ static void REDC (mpres_t r, const mpres_t x, mpz_t t, mpmod_t modulus) { mp_size_t n = modulus->bits / GMP_NUMB_BITS; mp_size_t xn = ABSIZ(x); ASSERT (xn <= 2 * n); if (xn == 2 * n) /* ecm_redc_n also accepts xn=2n-1, but this seems slower for now (see remark in TODO) */ { mp_ptr rp; MPZ_REALLOC (r, n); rp = PTR(r); ecm_redc_n (rp, PTR(x), xn, PTR(modulus->orig_modulus), PTR(modulus->aux_modulus), n); MPN_NORMALIZE(rp, n); SIZ(r) = (SIZ(x) > 0) ? (int) n : (int) -n; MPZ_NORMALIZED (r); } else { mpz_tdiv_r_2exp (t, x, modulus->bits); mpz_mul (t, t, modulus->aux_modulus); mpz_tdiv_r_2exp (t, t, modulus->bits); /* t = (x % R) * 1/N (mod R) */ mpz_mul (t, t, modulus->orig_modulus); /* t <= (R-1)*N */ mpz_add (t, t, x); /* t < (R-1)*N + R^2/B */ mpz_tdiv_q_2exp (r, t, modulus->bits); /* r = (x + m*N) / R < N + R/B */ if (ABSIZ (r) > n) /* this can happen in practice but is extremely unlikely: in particular it requires that the upper limb of N has only ones */ mpz_sub (r, r, modulus->multiple); } ASSERT (ABSIZ(r) <= n); } /* Quadratic time redc for n word moduli. */ static inline void redc_basecase_n (mp_ptr rp, mp_ptr cp, mp_srcptr np, const mp_size_t nn, const mp_ptr invm) { #ifdef HAVE___GMPN_REDC_2 REDC2(rp, cp, np, nn, invm); #else /* HAVE___GMPN_REDC_2 is not defined */ #ifdef HAVE___GMPN_REDC_1 REDC1(rp, cp, np, nn, invm[0]); #else /* neither HAVE___GMPN_REDC_2 nor HAVE___GMPN_REDC_1 is defined */ mp_limb_t cy; mp_size_t j; for (j = 0; j < nn; j++) { cy = mpn_addmul_1 (cp, np, nn, cp[0] * invm[0]); ASSERT(cp[0] == (mp_limb_t) 0); cp[0] = cy; cp++; } /* add vector of carries and shift */ cy = mpn_add_n (rp, cp, cp - nn, nn); /* the result of Montgomery's REDC is less than 2^Nbits + N, thus at most one correction is enough */ if (cy != 0) { mp_limb_t t; t = mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */ ASSERT (t == 1); } #endif /* HAVE___GMPN_REDC_1 */ #endif /* HAVE___GMPN_REDC_2 */ } /* r <- c/R^nn mod n, where n has nn limbs, and R=2^GMP_NUMB_BITS. n must be odd. c must have space for at least 2*nn limbs. r must have space for at least n limbs. c and r can be the same variable. The data in c is clobbered. */ static void ecm_redc_basecase (mpz_ptr r, mpz_ptr c, mpmod_t modulus) { mp_ptr rp; mp_ptr cp; mp_srcptr np; mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS; ASSERT(ABSIZ(c) <= 2 * nn); ASSERT(ALLOC(c) >= 2 * nn); ASSERT(ALLOC(r) >= nn); cp = PTR(c); rp = PTR(r); np = PTR(modulus->orig_modulus); for (j = ABSIZ(c); j < 2 * nn; j++) cp[j] = 0; redc_basecase_n (rp, cp, np, nn, modulus->Nprim); MPN_NORMALIZE (rp, nn); SIZ(r) = SIZ(c) < 0 ? (int) -nn : (int) nn; } #ifdef USE_ASM_REDC /* Quadratic time multiplication and REDC with nn-limb modulus. x and y are nn-limb residues, the nn-limb result is written to z. This function merely calls the correct mulredc*() assembly function depending on nn, and processes any leftover carry. */ static void mulredc (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_srcptr m, const mp_size_t nn, const mp_limb_t invm) { mp_limb_t cy; switch (nn) { case 1: cy = mulredc1(z, x[0], y[0], m[0], invm); break; case 2: cy = mulredc2(z, x, y, m, invm); break; case 3: cy = mulredc3(z, x, y, m, invm); break; case 4: cy = mulredc4(z, x, y, m, invm); break; case 5: cy = mulredc5(z, x, y, m, invm); break; case 6: cy = mulredc6(z, x, y, m, invm); break; case 7: cy = mulredc7(z, x, y, m, invm); break; case 8: cy = mulredc8(z, x, y, m, invm); break; case 9: cy = mulredc9(z, x, y, m, invm); break; case 10: cy = mulredc10(z, x, y, m, invm); break; case 11: cy = mulredc11(z, x, y, m, invm); break; case 12: cy = mulredc12(z, x, y, m, invm); break; case 13: cy = mulredc13(z, x, y, m, invm); break; case 14: cy = mulredc14(z, x, y, m, invm); break; case 15: cy = mulredc15(z, x, y, m, invm); break; case 16: cy = mulredc16(z, x, y, m, invm); break; case 17: cy = mulredc17(z, x, y, m, invm); break; case 18: cy = mulredc18(z, x, y, m, invm); break; case 19: cy = mulredc19(z, x, y, m, invm); break; case 20: cy = mulredc20(z, x, y, m, invm); break; default: abort(); } /* the result of Montgomery's REDC is less than 2^Nbits + N, thus at most one correction is enough */ if (cy != 0) { ATTRIBUTE_UNUSED mp_limb_t t; t = mpn_sub_n (z, z, m, nn); /* a borrow should always occur here */ ASSERT (t == 1); } } #if 0 /* {rp, n} <- {ap, n}^2/B^n mod {np, n} where B = 2^GMP_NUMB_BITS */ static void sqrredc (mp_ptr rp, mp_srcptr ap, mp_srcptr np, const mp_size_t n, const mp_limb_t invm) { mp_ptr cp; mp_size_t i; mp_limb_t cy, q; TMP_DECL; TMP_MARK; cp = TMP_ALLOC_LIMBS(2*n); for (i = 0; i < n; i++) umul_ppmm (cp[2*i+1], cp[2*i], ap[i], ap[i]); if (UNLIKELY(n == 1)) { q = cp[0] * invm; rp[0] = mpn_addmul_1 (cp, np, 1, q); cy = mpn_add_n (rp, rp, cp + 1, 1); goto end_sqrredc; } if (cp[0] & (mp_limb_t) 1) /* cp[n] is either some ap[i]^2 mod B or floor(ap[i]^2/B), the latter is at most floor((B-1)^2/B) = B-2, and the former cannot be B-1 since -1 is not a square mod 2^n for n >1, thus there is no carry in cp[n] + ... below */ cp[n] += mpn_add_n (cp, cp, np, n); /* now {cp, 2n} is even: divide by two */ mpn_rshift (cp, cp, 2*n, 1); /* now cp[2n-1] is at most B/2-1 */ for (i = 0; i < n - 1; i++) { q = cp[i] * invm; cp[i] = mpn_addmul_1 (cp + i, np, n, q); /* accumulate ap[i+1..n-1] * ap[i] */ rp[i] = mpn_addmul_1 (cp + 2 * i + 1, ap + i + 1, n - 1 - i, ap[i]); } /* the last iteration did set cp[n-2] to zero, accumulated a[n-1] * a[n-2] */ /* cp[2n-1] was untouched so far, so it is still at most B/2-1 */ q = cp[n-1] * invm; rp[n-1] = mpn_addmul_1 (cp + n - 1, np, n, q); /* rp[n-1] <= floor((B^n-1)*(B-1)/B^n)<=B-2 */ /* now add {rp, n}, {cp+n, n} and {cp, n-1} */ /* cp[2n-1] still <= B/2-1 */ rp[n-1] += mpn_add_n (rp, rp, cp, n-1); /* no overflow in rp[n-1] + ... */ cy = mpn_add_n (rp, rp, cp + n, n); /* multiply by 2 */ cy = (cy << 1) + mpn_lshift (rp, rp, n, 1); end_sqrredc: while (cy) cy -= mpn_sub_n (rp, rp, np, n); TMP_FREE; } #endif #ifdef HAVE_NATIVE_MULREDC1_N /* Multiplies y by the 1-limb value of x and does modulo reduction. The resulting residue may be multiplied by some constant, which makes this function useful only for cases where, e.g., all projective coordinates are multiplied by the same constant. More precisely it computes: {z, N} = {y, N} * x / 2^GMP_NUMB_BITS mod {m, N} */ static void mulredc_1 (mp_ptr z, const mp_limb_t x, mp_srcptr y, mp_srcptr m, const mp_size_t N, const mp_limb_t invm) { mp_limb_t cy; switch (N) { case 1: cy = mulredc1(z, x, y[0], m[0], invm); break; case 2: cy = mulredc1_2(z, x, y, m, invm); break; case 3: cy = mulredc1_3(z, x, y, m, invm); break; case 4: cy = mulredc1_4(z, x, y, m, invm); break; case 5: cy = mulredc1_5(z, x, y, m, invm); break; case 6: cy = mulredc1_6(z, x, y, m, invm); break; case 7: cy = mulredc1_7(z, x, y, m, invm); break; case 8: cy = mulredc1_8(z, x, y, m, invm); break; case 9: cy = mulredc1_9(z, x, y, m, invm); break; case 10: cy = mulredc1_10(z, x, y, m, invm); break; case 11: cy = mulredc1_11(z, x, y, m, invm); break; case 12: cy = mulredc1_12(z, x, y, m, invm); break; case 13: cy = mulredc1_13(z, x, y, m, invm); break; case 14: cy = mulredc1_14(z, x, y, m, invm); break; case 15: cy = mulredc1_15(z, x, y, m, invm); break; case 16: cy = mulredc1_16(z, x, y, m, invm); break; case 17: cy = mulredc1_17(z, x, y, m, invm); break; case 18: cy = mulredc1_18(z, x, y, m, invm); break; case 19: cy = mulredc1_19(z, x, y, m, invm); break; case 20: cy = mulredc1_20(z, x, y, m, invm); break; default: { abort (); } } /* the result of Montgomery's REDC is less than 2^Nbits + N, thus one correction (at most) is enough */ if (cy != 0) { ATTRIBUTE_UNUSED mp_limb_t t; t = mpn_sub_n (z, z, m, N); /* a borrow should always occur here */ ASSERT (t == 1); } } #endif /* ifdef HAVE_NATIVE_MULREDC1_N */ #endif static int tune_mulredc_table[] = TUNE_MULREDC_TABLE; static int tune_sqrredc_table[] = TUNE_SQRREDC_TABLE; static void ecm_mulredc_basecase_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_srcptr np, mp_size_t nn, mp_ptr invm, mp_ptr tmp) { mp_limb_t cy; mp_size_t j; if (nn <= MULREDC_ASSEMBLY_MAX) { switch (tune_mulredc_table[nn]) { case MPMOD_MULREDC: /* use quadratic assembly mulredc */ #ifdef USE_ASM_REDC mulredc (rp, s1p, s2p, np, nn, invm[0]); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC1: /* mpn_mul_n + __gmpn_redc_1 */ #ifdef HAVE___GMPN_REDC_1 mpn_mul_n (tmp, s1p, s2p, nn); REDC1(rp, tmp, np, nn, invm[0]); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC2: /* mpn_mul_n + __gmpn_redc_2 */ #ifdef HAVE___GMPN_REDC_2 mpn_mul_n (tmp, s1p, s2p, nn); REDC2(rp, tmp, np, nn, invm); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDCN: /* mpn_mul_n + __gmpn_redc_n */ /* disable redc_n for now, since it uses the opposite precomputed inverse wrt redc_1 and redc_2 */ #ifdef HAVE___GMPN_REDC_Nxxx mpn_mul_n (tmp, s1p, s2p, nn); __gmpn_redc_n (rp, tmp, np, nn, invm); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC_C: /* plain C quadratic reduction */ mpn_mul_n (tmp, s1p, s2p, nn); for (j = 0; j < nn; j++, tmp++) tmp[0] = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm[0]); cy = mpn_add_n (rp, tmp - nn, tmp, nn); if (cy != 0) mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */ break; default: { outputf (OUTPUT_ERROR, "Invalid mulredc mode: %d\n", tune_mulredc_table[nn]); exit (EXIT_FAILURE); } } } else /* nn > MULREDC_ASSEMBLY_MAX */ { mpn_mul_n (tmp, s1p, s2p, nn); ecm_redc_n (rp, tmp, 2 * nn, np, invm, nn); } } static void ecm_sqrredc_basecase_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr np, mp_size_t nn, mp_ptr invm, mp_ptr tmp) { mp_limb_t cy; mp_size_t j; if (nn <= MULREDC_ASSEMBLY_MAX) { switch (tune_sqrredc_table[nn]) { case MPMOD_MULREDC: /* use quadratic assembly mulredc */ #ifdef USE_ASM_REDC mulredc (rp, s1p, s1p, np, nn, invm[0]); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC1: /* mpn_sqr + __gmpn_redc_1 */ #ifdef HAVE___GMPN_REDC_1 mpn_sqr (tmp, s1p, nn); REDC1(rp, tmp, np, nn, invm[0]); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC2: /* mpn_sqr + __gmpn_redc_2 */ #ifdef HAVE___GMPN_REDC_2 mpn_sqr (tmp, s1p, nn); REDC2(rp, tmp, np, nn, invm); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDCN: /* mpn_sqr + __gmpn_redc_n */ /* disable redc_n for now, since it uses the opposite precomputed inverse wrt redc_1 and redc_2 */ #ifdef HAVE___GMPN_REDC_Nxxx mpn_sqr (tmp, s1p, nn); __gmpn_redc_n (rp, tmp, np, nn, invm); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC_C: /* plain C quadratic reduction */ mpn_sqr (tmp, s1p, nn); for (j = 0; j < nn; j++, tmp++) tmp[0] = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm[0]); cy = mpn_add_n (rp, tmp - nn, tmp, nn); if (cy != 0) mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */ break; default: { outputf (OUTPUT_ERROR, "Invalid sqrredc mode: %d\n", tune_sqrredc_table[nn]); exit (EXIT_FAILURE); } } } else /* nn > MULREDC_ASSEMBLY_MAX */ { mpn_sqr (tmp, s1p, nn); ecm_redc_n (rp, tmp, 2 * nn, np, invm, nn); } } /* R <- S1 * S2 mod modulus i.e. R <- S1*S2/r^nn mod n, where n has nn limbs, and r=2^GMP_NUMB_BITS. Same as ecm_redc_basecase previous, but combined with mul Neither input argument must be in modulus->temp1 */ static void ecm_mulredc_basecase (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_ptr s1p, s2p, rp = PTR(R); mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS; ASSERT(ALLOC(R) >= nn); ASSERT(ALLOC(S1) >= nn); ASSERT(ALLOC(S2) >= nn); s1p = PTR(S1); s2p = PTR(S2); /* FIXME: S1 and S2 are input and marked const, we mustn't write to them */ for (j = ABSIZ(S1); j < nn; j++) s1p[j] = 0; for (j = ABSIZ(S2); j < nn; j++) s2p[j] = 0; ecm_mulredc_basecase_n (rp, s1p, s2p, PTR(modulus->orig_modulus), nn, modulus->Nprim, PTR(modulus->temp1)); MPN_NORMALIZE (rp, nn); SIZ(R) = (SIZ(S1)*SIZ(S2)) < 0 ? (int) -nn : (int) nn; } /* R <- S1^2 mod modulus i.e. R <- S1^2/r^nn mod n, where n has nn limbs, and r=2^GMP_NUMB_BITS. Same as ecm_redc_basecase previous, but combined with sqr The input argument must not be in modulus->temp1 */ static void ecm_sqrredc_basecase (mpres_t R, const mpres_t S1, mpmod_t modulus) { mp_ptr rp; mp_ptr s1p; mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS; ASSERT(ALLOC(R) >= nn); ASSERT(ALLOC(S1) >= nn); rp = PTR(R); s1p = PTR(S1); /* FIXME: S1 is input and marked const, we mustn't write to it */ for (j = ABSIZ(S1); j < nn; j++) s1p[j] = 0; ecm_sqrredc_basecase_n (rp, s1p, PTR(modulus->orig_modulus), nn, modulus->Nprim, PTR(modulus->temp1)); MPN_NORMALIZE (rp, nn); SIZ(R) = (int) nn; } /* If the user asked for a particular representation, always use it. If repr = ECM_MOD_DEFAULT, use the thresholds. Don't use base2 if repr = ECM_MOD_NOBASE2. If a value is <= -16 or >= 16, it is a base2 exponent. Return a non-zero value if an error occurred. */ int mpmod_init (mpmod_t modulus, const mpz_t N, int repr) { int base2 = 0, r = 0; mp_size_t n = mpz_size (N); switch (repr) { case ECM_MOD_DEFAULT: if ((base2 = isbase2 (N, BASE2_THRESHOLD))) { repr = ECM_MOD_BASE2; break; } /* else go through */ #if defined( __GNUC__ ) && __GNUC__ >= 7 && !defined(__ICC) __attribute__ ((fallthrough)); #endif case ECM_MOD_NOBASE2: if (mpz_size (N) < MPZMOD_THRESHOLD) repr = ECM_MOD_MODMULN; else if (mpz_size (N) < REDC_THRESHOLD) repr = ECM_MOD_MPZ; else repr = ECM_MOD_REDC; } /* now repr is {ECM_MOD_BASE2, ECM_MOD_MODMULN, ECM_MOD_MPZ, ECM_MOD_REDC}, or |repr| >= 16. */ switch (repr) { case ECM_MOD_MPZ: outputf (OUTPUT_VERBOSE, "Using mpz_mod\n"); mpmod_init_MPZ (modulus, N); break; case ECM_MOD_MODMULN: outputf (OUTPUT_VERBOSE, "Using MODMULN [mulredc:%d, sqrredc:%d]\n", (n <= MULREDC_ASSEMBLY_MAX) ? tune_mulredc_table[n] : 4, (n <= MULREDC_ASSEMBLY_MAX) ? tune_sqrredc_table[n] : 4); mpmod_init_MODMULN (modulus, N); break; case ECM_MOD_REDC: outputf (OUTPUT_VERBOSE, "Using REDC\n"); mpmod_init_REDC (modulus, N); break; default: /* base2 case: either repr=ECM_MOD_BASE2, and base2 was determined above, or |repr| >= 16, and we want base2 = repr */ if (repr != ECM_MOD_BASE2) base2 = repr; r = mpmod_init_BASE2 (modulus, base2, N); break; } return r; } void mpres_clear (mpres_t a, ATTRIBUTE_UNUSED const mpmod_t modulus) { mpz_clear (a); PTR(a) = NULL; /* Make sure we segfault if we access it again */ } void mpmod_init_MPZ (mpmod_t modulus, const mpz_t N) { size_t n; mpz_init_set (modulus->orig_modulus, N); modulus->repr = ECM_MOD_MPZ; n = mpz_size (N); /* number of limbs of N */ modulus->bits = n * GMP_NUMB_BITS; /* Number of bits, rounded up to full limb */ mpz_init2 (modulus->temp1, 2UL * modulus->bits + GMP_NUMB_BITS); mpz_init2 (modulus->temp2, modulus->bits); mpz_init2 (modulus->aux_modulus, modulus->bits); mpz_set_ui (modulus->aux_modulus, 1UL); /* we precompute B^(n + ceil(n/2)) mod N, where B=2^GMP_NUMB_BITS */ mpz_mul_2exp (modulus->aux_modulus, modulus->aux_modulus, (n + (n + 1) / 2) * GMP_NUMB_BITS); mpz_mod (modulus->aux_modulus, modulus->aux_modulus, N); return; } int mpmod_init_BASE2 (mpmod_t modulus, const int base2, const mpz_t N) { int Nbits; outputf (OUTPUT_VERBOSE, "Using special division for factor of 2^%d%c1\n", abs (base2), (base2 < 0) ? '-' : '+'); mpz_init_set (modulus->orig_modulus, N); modulus->repr = ECM_MOD_BASE2; modulus->bits = base2; Nbits = mpz_size (N) * GMP_NUMB_BITS; /* Number of bits, rounded up to full limb */ mpz_init2 (modulus->temp1, 2UL * Nbits + GMP_NUMB_BITS); mpz_init2 (modulus->temp2, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, abs (base2)); if (base2 < 0) mpz_sub_ui (modulus->temp1, modulus->temp1, 1UL); else mpz_add_ui (modulus->temp1, modulus->temp1, 1UL); if (!mpz_divisible_p (modulus->temp1, N)) { outputf (OUTPUT_ERROR, "mpmod_init_BASE2: n does not divide 2^%d%c1\n", abs (base2), base2 < 0 ? '-' : '+'); mpz_clear (modulus->temp2); mpz_clear (modulus->temp1); mpz_clear (modulus->orig_modulus); return ECM_ERROR; } modulus->Fermat = 0; if (base2 > 0) { unsigned long i; for (i = base2; (i & 1) == 0; i >>= 1); if (i == 1) { modulus->Fermat = base2; } } return 0; } /* initialize the following fields: orig_modulus - the original modulus bits - # of bits of N, rounded up to a multiple of GMP_NUMB_BITS temp1, temp2 - auxiliary variables Nprim - -1/N mod B^n where B=2^GMP_NUMB_BITS and n = #limbs(N) R2 - (2^bits)^2 (mod N) R3 - (2^bits)^3 (mod N) multiple - smallest multiple of N >= 2^bits */ void mpmod_init_MODMULN (mpmod_t modulus, const mpz_t N) { int Nbits; mpz_init_set (modulus->orig_modulus, N); modulus->repr = ECM_MOD_MODMULN; Nbits = mpz_size (N) * GMP_NUMB_BITS; /* Number of bits, rounded up to full limb */ modulus->bits = Nbits; mpz_init2 (modulus->temp1, 2UL * Nbits + GMP_NUMB_BITS); mpz_init2 (modulus->temp2, Nbits + 1); modulus->Nprim = (mp_limb_t*) malloc (mpz_size (N) * sizeof (mp_limb_t)); mpz_init2 (modulus->R2, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, 2 * Nbits); mpz_mod (modulus->R2, modulus->temp1, modulus->orig_modulus); /* Now R2 = (2^bits)^2 (mod N) */ mpz_init2 (modulus->R3, Nbits); mpz_mul_2exp (modulus->temp1, modulus->R2, Nbits); mpz_mod (modulus->R3, modulus->temp1, modulus->orig_modulus); /* Now R3 = (2^bits)^3 (mod N) */ mpz_init2 (modulus->multiple, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits); /* compute ceil(2^bits / N) */ mpz_cdiv_q (modulus->temp1, modulus->temp1, modulus->orig_modulus); mpz_mul (modulus->multiple, modulus->temp1, modulus->orig_modulus); /* Now multiple is the smallest multiple of N >= 2^bits */ mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits); /* since we directly check even modulus in ecm/pm1/pp1, N is odd here, thus 1/N mod 2^Nbits always exist */ mpz_invert (modulus->temp2, N, modulus->temp1); /* temp2 = 1/N mod B^n */ mpz_sub (modulus->temp2, modulus->temp1, modulus->temp2); /* temp2 = -1/N mod B^n */ /* ensure Nprim has all its n limbs correctly set, for ecm_redc_n */ MPN_ZERO(modulus->Nprim, mpz_size (N)); mpn_copyi (modulus->Nprim, PTR(modulus->temp2), ABSIZ(modulus->temp2)); } void mpmod_init_REDC (mpmod_t modulus, const mpz_t N) { mp_size_t n; int Nbits; mpz_init_set (modulus->orig_modulus, N); n = mpz_size (N); modulus->repr = ECM_MOD_REDC; Nbits = n * GMP_NUMB_BITS; /* Number of bits, rounded up to full limb */ modulus->bits = Nbits; mpz_init2 (modulus->temp1, 2 * Nbits + GMP_NUMB_BITS); mpz_init2 (modulus->temp2, Nbits); mpz_init2 (modulus->aux_modulus, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits); /* since we directly check even modulus in ecm/pm1/pp1, N is odd here, thus 1/N mod 2^Nbits always exist */ mpz_invert (modulus->aux_modulus, N, modulus->temp1); mpz_sub (modulus->aux_modulus, modulus->temp1, modulus->aux_modulus); /* ensure aux_modulus has n allocated limbs, for ecm_redc_n */ if (ABSIZ(modulus->aux_modulus) < n) { _mpz_realloc (modulus->aux_modulus, n); /* in case the reallocation fails, _mpz_realloc sets the value to 0 */ ASSERT_ALWAYS (mpz_cmp_ui (modulus->aux_modulus, 0) != 0); MPN_ZERO (PTR(modulus->aux_modulus) + ABSIZ(modulus->aux_modulus), n - ABSIZ(modulus->aux_modulus)); } mpz_init2 (modulus->R2, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, 2 * Nbits); mpz_mod (modulus->R2, modulus->temp1, modulus->orig_modulus); /* Now R2 = (2^bits)^2 (mod N) */ mpz_init2 (modulus->R3, Nbits); mpz_mul_2exp (modulus->temp1, modulus->R2, Nbits); mpz_mod (modulus->R3, modulus->temp1, modulus->orig_modulus); /* Now R3 = (2^bits)^3 (mod N) */ mpz_init (modulus->multiple); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits); /* compute ceil(2^bits / N) */ mpz_cdiv_q (modulus->temp1, modulus->temp1, modulus->orig_modulus); mpz_mul (modulus->multiple, modulus->temp1, modulus->orig_modulus); /* Now multiple is the largest multiple of N >= 2^bits */ } void mpmod_clear (mpmod_t modulus) { mpz_clear (modulus->orig_modulus); mpz_clear (modulus->temp1); mpz_clear (modulus->temp2); if (modulus->repr == ECM_MOD_REDC || modulus->repr == ECM_MOD_MPZ) mpz_clear (modulus->aux_modulus); if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_clear (modulus->R2); mpz_clear (modulus->R3); mpz_clear (modulus->multiple); } if (modulus->repr == ECM_MOD_MODMULN) free (modulus->Nprim); return; } /* initialize r and set all entries from those of modulus */ void mpmod_init_set (mpmod_t r, const mpmod_t modulus) { const unsigned long Nbits = abs(modulus->bits); const unsigned long n = mpz_size (modulus->orig_modulus); r->repr = modulus->repr; r->bits = modulus->bits; r->Fermat = modulus->Fermat; mpz_init_set (r->orig_modulus, modulus->orig_modulus); mpz_init2 (r->temp1, 2 * Nbits + GMP_NUMB_BITS); mpz_init2 (r->temp2, Nbits + GMP_NUMB_BITS); if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_init2 (r->multiple, Nbits); mpz_init2 (r->R2, Nbits); mpz_init2 (r->R3, Nbits); mpz_set (r->multiple, modulus->multiple); mpz_set (r->R2, modulus->R2); mpz_set (r->R3, modulus->R3); } if (modulus->repr == ECM_MOD_REDC || modulus->repr == ECM_MOD_MPZ) { mpz_init2 (r->aux_modulus, Nbits); mpz_set (r->aux_modulus, modulus->aux_modulus); /* for ECM_MOD_REDC, ensure r->aux_modulus has n limbs */ _mpz_realloc (r->aux_modulus, n); MPN_ZERO (PTR(r->aux_modulus) + ABSIZ(r->aux_modulus), n - ABSIZ(r->aux_modulus)); } if (modulus->repr == ECM_MOD_MODMULN) { r->Nprim = (mp_limb_t*) malloc (n * sizeof (mp_limb_t)); mpn_copyi (r->Nprim, modulus->Nprim, n); } } void mpres_init (mpres_t R, const mpmod_t modulus) { /* use mpz_sizeinbase since modulus->bits may not be initialized yet */ mpz_init2 (R, mpz_sizeinbase (modulus->orig_modulus, 2) + GMP_NUMB_BITS); } /* realloc R so that it has at least the same number of limbs as modulus */ void mpres_realloc (mpres_t R, const mpmod_t modulus) { if (modulus->repr == ECM_MOD_MODMULN) MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); } /* a <- b^2 mod (modulus). a and b might be equal, but cannot be modulus->temp1. Assumes repr = ECM_MOD_BASE2 or ECM_MOD_MODMULN or ECM_MOD_REDC */ static inline void mpres_pow_sqr (mpres_t a, const mpres_t b, mpmod_t modulus) { /* Set temp2 = temp2*temp2 */ if (modulus->repr == ECM_MOD_BASE2) { mpz_mul (modulus->temp1, b, b); base2mod (a, modulus->temp1, modulus->temp1, modulus); } else if (modulus->repr == ECM_MOD_MODMULN) ecm_sqrredc_basecase (a, b, modulus); else { ASSERT(modulus->repr == ECM_MOD_REDC); mpz_mul (modulus->temp1, b, b); REDC (a, modulus->temp1, modulus->temp2, modulus); } } /* a <- b*c mod (modulus) a, b, c must not equal modulus->temp1. Assumes repr = ECM_MOD_BASE2 or ECM_MOD_MODMULN or ECM_MOD_REDC */ static inline void mpres_pow_mul (mpres_t a, const mpres_t b, const mpres_t c, mpmod_t modulus) { /* Set temp2 = temp2*temp2 */ if (modulus->repr == ECM_MOD_BASE2) { mpz_mul (modulus->temp1, b, c); base2mod (a, modulus->temp1, modulus->temp1, modulus); } else if (modulus->repr == ECM_MOD_MODMULN) ecm_mulredc_basecase (a, b, c, modulus); else { ASSERT(modulus->repr == ECM_MOD_REDC); mpz_mul (modulus->temp1, b, c); REDC (a, modulus->temp1, modulus->temp2, modulus); } } /* R <- BASE^EXP mod modulus. Warning: EXP can be negative (only occurs for P-1 in revision 2982). */ void mpres_pow (mpres_t R, const mpres_t BASE, const mpz_t EXP, mpmod_t modulus) { size_t k, expnbits, K, lw, n0, i; mpres_t *B; mp_limb_t w; ASSERT_NORMALIZED(BASE); if (modulus->repr == ECM_MOD_MPZ) mpz_powm (R, BASE, EXP, modulus->orig_modulus); else if (modulus->repr == ECM_MOD_BASE2 || modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { size_t expidx; mp_limb_t bitmask, expbits; int sgn = mpz_sgn (EXP); mpz_t exp; /* case EXP=0 */ if (sgn == 0) { mpres_set_ui (R, 1UL, modulus); /* set result to 1 */ ASSERT_NORMALIZED (R); return; } /* exp <- |EXP| */ PTR(exp) = PTR(EXP); SIZ(exp) = ABSIZ(EXP); expidx = mpz_size (exp) - 1; /* point at most significant limb */ expbits = mpz_getlimbn (exp, expidx); /* get most significant limb */ ASSERT (expbits != 0); /* Scan for the MSB in expbits */ bitmask = ((mp_limb_t) 1) << (GMP_NUMB_BITS - 1); for (; (bitmask & expbits) == 0; bitmask >>= 1); /* here the most significant limb with any set bits is in expbits, */ /* bitmask is set to mask in the msb of expbits */ k = 1; /* sliding window size */ expnbits = mpz_sizeinbase (exp, 2); /* the average number of multiplications is 2^(k-1) + expnbits / (k+1) */ while ((1 << (k-1)) + expnbits / (k + 1) > (1 << k) + expnbits / (k + 2)) k ++; /* precompute BASE^i for i = 2, 3, 5, ..., 2^k-1 */ K = 1UL << (k - 1); B = malloc (K * sizeof (mpres_t)); for (i = 0; i < K; i++) { mpres_init (B[i], modulus); mpres_realloc (B[i], modulus); if (i == 0) /* store BASE^2 in B[0] */ mpres_pow_sqr (B[i], BASE, modulus); else if (i == 1) mpres_pow_mul (B[i], B[0], BASE, modulus); else mpres_pow_mul (B[i], B[i-1], B[0], modulus); } mpz_set (modulus->temp2, BASE); bitmask >>= 1; w = 0; /* invariant: temp2 has to be multiplied by BASE^w */ lw = 0; /* number of bits in w */ expnbits --; /* number of remaining bits to deal with */ n0 = 0; /* smallest bit index of current window */ while (1) { for ( ; bitmask != 0; bitmask >>= 1, expnbits--) { /* Set temp2 = temp2 ^ 2 */ mpres_pow_sqr (modulus->temp2, modulus->temp2, modulus); w = w << 1; lw += (lw != 0); /* If bit is 1, accumulate in w */ if (expbits & bitmask) { w ++; if (lw == 0) { lw = 1; /* n0 points to the smallest bit of the current window */ n0 = (expnbits >= k) ? expnbits - k : 0; } /* if the current bit is the smallest one of the window, we multiply by BASE^w */ if (mpz_scan1 (exp, n0) == expnbits - 1) { ASSERT(w/2 < K); mpres_pow_mul (modulus->temp2, (w == 1) ? BASE : B[w/2], modulus->temp2, modulus); w = lw = 0; } } } if (expidx == 0) /* if we just processed the least */ break; /* significant limb, we are done */ expidx --; expbits = mpz_getlimbn (exp, expidx); bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1); } ASSERT(w/2 < K); if (w != 0) mpres_pow_mul (modulus->temp2, (w == 1) ? BASE : B[w/2], modulus->temp2, modulus); for (i = 0; i < K; i++) mpres_clear (B[i], modulus); free (B); mpz_set (R, modulus->temp2); /* mpz_getlimbn() ignores sign of argument, so we computed BASE^|EXP|. If EXP was negative, do a modular inverse */ if (sgn < 0) mpres_invert (R, R, modulus); } /* if (modulus->repr == ECM_MOD_BASE2 || ... ) */ ASSERT_NORMALIZED (R); } /* Returns 1 if S == 0 (mod modulus), 0 otherwise */ int mpres_is_zero (const mpres_t S, mpmod_t modulus) { mpz_mod (modulus->temp1, S, modulus->orig_modulus); /* For all currently implemented representations, a zero residue has zero integer representation */ return (mpz_sgn (modulus->temp1) == 0) ? 1 : 0; } /* R <- BASE^EXP mod modulus */ void mpres_ui_pow (mpres_t R, const unsigned long BASE, const mpres_t EXP, mpmod_t modulus) { if (modulus->repr == ECM_MOD_MPZ) { mpz_set_ui (modulus->temp1, BASE); mpz_powm (R, modulus->temp1, EXP, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_BASE2 || modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { size_t expidx; mp_limb_t bitmask, expbits; expidx = mpz_size (EXP) - 1; /* point at most significant limb */ expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */ bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1); /* case EXP=0 */ if (mpz_sgn (EXP) == 0) { mpres_set_ui (R, 1UL, modulus); /* set result to 1 */ ASSERT_NORMALIZED (R); return; } ASSERT (mpz_size (EXP) > 0); /* probably redundant with _sgn() test */ expidx = mpz_size (EXP) - 1; /* point at most significant limb */ expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */ ASSERT (expbits != 0); /* Scan for the MSB in expbits */ bitmask = ((mp_limb_t) 1) << (GMP_NUMB_BITS - 1); for (; (bitmask & expbits) == 0; bitmask >>= 1); /* here the most significant limb with any set bits is in expbits, */ /* bitmask is set to mask in the msb of expbits */ mpz_set_ui (modulus->temp2, BASE); /* temp2 = BASE */ if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_mul_2exp (modulus->temp1, modulus->temp2, modulus->bits); mpz_mod (modulus->temp2, modulus->temp1, modulus->orig_modulus); } bitmask >>= 1; while (1) { for ( ; bitmask != 0; bitmask >>= 1) { mpres_pow_sqr (modulus->temp2, modulus->temp2, modulus); /* If bit is 1, set temp2 = temp2 * BASE */ if (expbits & bitmask) { if (BASE == 2UL) { mpz_mul_2exp (modulus->temp2, modulus->temp2, 1); if (mpz_cmp (modulus->temp2, modulus->orig_modulus) >= 0) mpz_sub (modulus->temp2, modulus->temp2, modulus->orig_modulus); } else { mpz_mul_ui (modulus->temp1, modulus->temp2, BASE); mpz_mod (modulus->temp2, modulus->temp1, modulus->orig_modulus); } } } if (expidx == 0) /* if we just processed the least */ break; /* significant limb, we are done */ expidx--; expbits = mpz_getlimbn (EXP, expidx); bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1); } mpz_set (R, modulus->temp2); /* mpz_getlimbn() ignores sign of argument, so we computed BASE^|EXP|. If EXP was negative, do a modular inverse */ if (mpz_sgn (EXP) < 0) { mpres_invert (R, R, modulus); } } /* if (modulus->repr == ECM_MOD_BASE2 || ... ) */ ASSERT_NORMALIZED (R); } /* We use here the algorithm described in "Fast Modular Reduction" from Hasenplaugh, Gaubatz and Gobal, Arith'18, 2007: assuming N has n limbs, we have precomputed C = B^(n + ceil(n/2)) mod N. */ static void mpres_mpz_mod (mpres_t R, mpz_t T, mpz_t N, mpz_t C) { size_t n = mpz_size (N); size_t t = mpz_size (T); size_t m = n + (n + 1) / 2; /* n + ceil(n/2) */ if (t > m && n > 1) /* if n=1, then m=2, thus h=0 */ { size_t c = mpz_size (C); size_t h, l; mp_ptr rp; mp_ptr tp = PTR(T); /* Warning: we might have t > 2n. In that case we reduce {tp+l+m, t-(m+l)} where l = t-2n. */ l = (t > 2 * n) ? t - 2 * n : 0; tp += l; h = t - (m + l); /* since t-l <= 2n and m = n + ceil(n/2), we have h <= n - ceil(n/2) = floor(n/2). On the other hand, if l=0 we have h = t-m > 0; if l>0, then l=t-2n, thus h=2n-m = floor(n/2) > 0 since n > 1. */ mpz_realloc (R, c + h); rp = PTR(R); if (c > h) mpn_mul (rp, PTR(C), c, tp + m, h); else mpn_mul (rp, tp + m, h, PTR(C), c); /* now add {rp, c+h} to {tp, m}: we have c <= n and h <= n/2, thus c + h <= m */ if (c + h > m) abort(); tp[m] = mpn_add (tp, tp, m, rp, c + h); m += l + tp[m]; tp -= l; /* put back the low l limbs */ MPN_NORMALIZE(tp, m); SIZ(T) = (SIZ(T) > 0) ? m : -m; } mpz_mod (R, T, N); } void mpres_mul (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { ASSERT_NORMALIZED (S1); ASSERT_NORMALIZED (S2); #ifdef WANT_ASSERT_EXPENSIVE mpz_t test1, test2, test_result1, test_result2; ASSERT_ALWAYS (S1 != modulus->temp1 && S2 != modulus->temp1 && R != modulus->temp1); mpz_init (test1); mpz_init (test2); mpz_init (test_result1); mpz_init (test_result2); mpres_get_z (test1, S1, modulus); mpres_get_z (test2, S2, modulus); mpz_mul (test_result1, test1, test2); mpz_mod (test_result1, test_result1, modulus->orig_modulus); #endif if (UNLIKELY(modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768)) { mp_size_t n = modulus->Fermat / GMP_NUMB_BITS; unsigned long k; mp_srcptr s1p, s2p; mp_size_t s1s, s2s; MPZ_REALLOC (R, n + 1); s1p = PTR(S1); s1s = SIZ(S1); s2p = PTR(S2); s2s = SIZ(S2); k = mpn_fft_best_k (n, S1 == S2); ASSERT(mpn_fft_next_size (n, k) == n); if (base2mod_2 (modulus->temp1, S1, n, modulus->orig_modulus)) { s1p = PTR(modulus->temp1); s1s = SIZ(modulus->temp1); } if (S1 == S2) { s2p = s1p; s2s = s1s; } else if (base2mod_2 (modulus->temp2, S2, n, modulus->orig_modulus)) { s2p = PTR(modulus->temp2); s2s = SIZ(modulus->temp2); } /* mpn_mul_fft() computes the product modulo B^n + 1, where B = 2^(machine word size in bits). So the result can be = B^n, in that case R is set to zero and 1 is returned as carry-out. In all other cases 0 is returned. Hence the complete result is R + cy * B^n, where cy is the value returned by mpn_mul_fft(). */ PTR(R)[n] = mpn_mul_fft (PTR(R), n, s1p, ABS(s1s), s2p, ABS(s2s), k); n ++; MPN_NORMALIZE(PTR(R), n); SIZ(R) = ((s1s ^ s2s) >= 0) ? (int) n : (int) -n; return; } switch (modulus->repr) { case ECM_MOD_BASE2: mpz_mul (modulus->temp1, S1, S2); base2mod (R, modulus->temp1, modulus->temp1, modulus); break; case ECM_MOD_MODMULN: MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_mulredc_basecase (R, S1, S2, modulus); break; case ECM_MOD_REDC: mpz_mul (modulus->temp1, S1, S2); REDC (R, modulus->temp1, modulus->temp2, modulus); break; default: /* case ECM_MOD_MPZ */ mpz_mul (modulus->temp1, S1, S2); mpres_mpz_mod (R, modulus->temp1, modulus->orig_modulus, modulus->aux_modulus); break; } ASSERT_NORMALIZED (R); #ifdef WANT_ASSERT_EXPENSIVE mpres_get_z (test_result2, R, modulus); if (mpz_cmp (test_result1, test_result2) != 0) { printf ("mpres_mul and mpz_mul/mpz_mod produced different results.\n"); gmp_printf ("input 1: %Zd\n", test1); gmp_printf ("input 2: %Zd\n", test2); gmp_printf ("mpres_mul: %Zd\n", test_result2); gmp_printf ("mpz_mul/mpz_mod: %Zd\n", test_result1); abort (); } mpz_clear (test1); mpz_clear (test2); mpz_clear (test_result1); mpz_clear (test_result2); #endif } /* R <- S1^2 mod modulus */ void mpres_sqr (mpres_t R, const mpres_t S1, mpmod_t modulus) { ASSERT_NORMALIZED (S1); #ifdef WANT_ASSERT_EXPENSIVE mpz_t test1, test_result1, test_result2; ASSERT_ALWAYS (S1 != modulus->temp1 && R != modulus->temp1); mpz_init (test1); mpz_init (test_result1); mpz_init (test_result2); mpres_get_z (test1, S1, modulus); mpz_mul (test_result1, test1, test1); mpz_mod (test_result1, test_result1, modulus->orig_modulus); #endif if (UNLIKELY(modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768)) { mpres_mul (R, S1, S1, modulus); return; } switch (modulus->repr) { case ECM_MOD_BASE2: mpz_mul (modulus->temp1, S1, S1); base2mod (R, modulus->temp1, modulus->temp1, modulus); break; case ECM_MOD_MODMULN: MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_sqrredc_basecase (R, S1, modulus); break; case ECM_MOD_REDC: mpz_mul (modulus->temp1, S1, S1); REDC (R, modulus->temp1, modulus->temp2, modulus); break; default: /* case ECM_MOD_MPZ */ mpz_mul (modulus->temp1, S1, S1); mpres_mpz_mod (R, modulus->temp1, modulus->orig_modulus, modulus->aux_modulus); break; } ASSERT_NORMALIZED (R); #ifdef WANT_ASSERT_EXPENSIVE mpres_get_z (test_result2, R, modulus); if (mpz_cmp (test_result1, test_result2) != 0) { printf ("mpres_sqr and mpz_mul/mpz_mod produced different results.\n"); gmp_printf ("input 1: %Zd\n", test1); gmp_printf ("mpres_mul: %Zd\n", test_result2); gmp_printf ("mpz_mul/mpz_mod: %Zd\n", test_result1); abort (); } mpz_clear (test1); mpz_clear (test_result1); mpz_clear (test_result2); #endif } /* R <- S * n mod modulus */ void mpres_mul_ui (mpres_t R, const mpres_t S, const unsigned long n, mpmod_t modulus) { ASSERT_NORMALIZED (S); mpz_mul_ui (modulus->temp1, S, n); /* This is the same for all methods: just reduce with original modulus */ mpz_mod (R, modulus->temp1, modulus->orig_modulus); ASSERT_NORMALIZED (R); } /* R <- S * 2^k mod modulus */ void mpres_mul_2exp (mpres_t R, const mpres_t S, const unsigned long k, mpmod_t modulus) { ASSERT_NORMALIZED (S); mpz_mul_2exp (modulus->temp1, S, k); /* This is the same for all methods: just reduce with original modulus */ mpz_mod (R, modulus->temp1, modulus->orig_modulus); ASSERT_NORMALIZED (R); } /* This function multiplies an integer in mpres_t form with an integer in mpz_t form, and stores the output in mpz_t form. The advantage is that one REDC suffices to reduce the product and convert it to non-Montgomery representation. */ void mpres_mul_z_to_z (mpz_t R, const mpres_t S1, const mpz_t S2, mpmod_t modulus) { ASSERT_NORMALIZED (S1); if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768) { mp_size_t n = modulus->Fermat / GMP_NUMB_BITS; unsigned long k; mp_srcptr s1p = PTR(S1), s2p = PTR(S2); mp_size_t s1s = SIZ(S1), s2s = SIZ(S2); MPZ_REALLOC (R, n + 1); k = mpn_fft_best_k (n, S1 == S2); ASSERT(mpn_fft_next_size (n, k) == n); if (base2mod_2 (modulus->temp1, S1, n, modulus->orig_modulus)) { s1p = PTR(modulus->temp1); s1s = SIZ(modulus->temp1); } if (S1 == S2) { s2p = s1p; s2s = s1s; } else if (base2mod_2 (modulus->temp2, S2, n, modulus->orig_modulus)) { s2p = PTR(modulus->temp2); s2s = SIZ(modulus->temp2); } /* mpn_mul_fft() computes the product modulo B^n + 1, where B = 2^(machine word size in bits). So the result can be = B^n, in that case R is set to zero and 1 is returned as carry-out. In all other cases 0 is returned. Hence the complete result is R + cy * B^n, where cy is the value returned by mpn_mul_fft(). */ PTR(R)[n] = mpn_mul_fft (PTR(R), n, s1p, ABS(s1s), s2p, ABS(s2s), k); n ++; MPN_NORMALIZE(PTR(R), n); SIZ(R) = ((s1s ^ s2s) >= 0) ? (int) n : (int) -n; mpz_mod (R, R, modulus->orig_modulus); return; } switch (modulus->repr) { case ECM_MOD_BASE2: if (mpz_sizeinbase (S2, 2) > (unsigned) abs (modulus->bits)) { base2mod (modulus->temp2, S2, modulus->temp1, modulus); mpz_mul (modulus->temp1, S1, modulus->temp2); } else mpz_mul (modulus->temp1, S1, S2); base2mod (R, modulus->temp1, modulus->temp1, modulus); mpz_mod (R, R, modulus->orig_modulus); break; case ECM_MOD_MODMULN: if (mpz_cmp (S2, modulus->orig_modulus) >= 0) { mpz_mod (modulus->temp2, S2, modulus->orig_modulus); MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_mulredc_basecase (R, S1, modulus->temp2, modulus); mpz_mod (R, R, modulus->orig_modulus); } else { MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); if (ABSIZ(S2) < modulus->bits / GMP_NUMB_BITS) { mpz_t t; mpz_init2 (t, modulus->bits); mpz_set (t, S2); ecm_mulredc_basecase (R, S1, t, modulus); mpz_clear (t); } else ecm_mulredc_basecase (R, S1, S2, modulus); mpz_mod (R, R, modulus->orig_modulus); } break; case ECM_MOD_REDC: if (mpz_cmp (S2, modulus->orig_modulus) >= 0) { mpz_mod (modulus->temp2, S2, modulus->orig_modulus); mpz_mul (modulus->temp1, S1, modulus->temp2); } else mpz_mul (modulus->temp1, S1, S2); REDC (R, modulus->temp1, modulus->temp2, modulus); mpz_mod (R, R, modulus->orig_modulus); break; default: if (mpz_cmp (S2, modulus->orig_modulus) >= 0) { mpz_mod (modulus->temp2, S2, modulus->orig_modulus); mpz_mul (modulus->temp1, S1, modulus->temp2); } else mpz_mul (modulus->temp1, S1, S2); mpz_mod (R, modulus->temp1, modulus->orig_modulus); break; } ASSERT_NORMALIZED (R); } /* Sets R = S * c, for some constant c that is coprime to modulus. This is primarily useful for multiplying numbers together for a gcd with modulus. The advantage is that we don't need to convert the mpz_t to Montgomery representation before applying REDC. */ void mpres_set_z_for_gcd (mpres_t R, const mpz_t S, mpmod_t modulus) { mpz_mod (R, S, modulus->orig_modulus); ASSERT_NORMALIZED (R); } /* Companion function to mpres_set_z_for_gcd(). It divides by c^n. In case of MULREDC with k b-bit words, c = 1/2^(b*k), so we multiply by 2^(n*b*k). The purpose is to fix products of n terms collected by using mpres_set_z_for_gcd(), so that we can still get the exact residue. */ void mpres_set_z_for_gcd_fix (mpres_t R, const mpres_t S, const mpz_t n, mpmod_t modulus) { switch (modulus->repr) { case ECM_MOD_MODMULN: case ECM_MOD_REDC: { mpres_t po2; mpz_t nb; mpz_init (nb); mpres_init (po2, modulus); /* Note: modulus->bits is always non-negative for MODMULN and REDC */ mpz_mul_ui (nb, n, modulus->bits); mpres_set_ui (po2, 2, modulus); mpres_pow (po2, po2, nb, modulus); mpres_mul (R, S, po2, modulus); mpz_clear (nb); mpres_clear (po2, modulus); } } } /* R <- S / 2^n mod modulus. Does not need to be fast. */ void mpres_div_2exp (mpres_t R, const mpres_t S, const unsigned int n, mpmod_t modulus) { int i; ASSERT_NORMALIZED (S); if (n == 0) { mpres_set (R, S, modulus); ASSERT_NORMALIZED (R); return; } if (mpz_odd_p (S)) { ASSERT (mpz_odd_p (modulus->orig_modulus)); mpz_add (R, S, modulus->orig_modulus); mpz_tdiv_q_2exp (R, R, 1); } else mpz_tdiv_q_2exp (R, S, 1); for (i = n ; i > 1; i--) { if (mpz_odd_p (R)) { ASSERT (mpz_odd_p (modulus->orig_modulus)); mpz_add (R, R, modulus->orig_modulus); } mpz_tdiv_q_2exp (R, R, 1); } ASSERT_NORMALIZED (R); } void mpres_add_ui (mpres_t R, const mpres_t S, const unsigned long n, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_add_ui (R, S, n); if (mpz_cmp (R, modulus->orig_modulus) > 0) mpz_sub (R, R, modulus->orig_modulus); /* This assumes modulus >= n */ } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_ui (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_add (modulus->temp1, modulus->temp1, S); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* R <- S1 + S2 mod modulus */ void mpres_add (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { ASSERT_NORMALIZED (S1); ASSERT_NORMALIZED (S2); mpz_add (R, S1, S2); if ((modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) && ABSIZ(R) > ABSIZ(modulus->orig_modulus)) { if (SIZ(R) > 0) mpz_sub (R, R, modulus->multiple); else mpz_add (R, R, modulus->multiple); /* N <= since multiple < 2^Nbits + N, now |R| < B */ } ASSERT_NORMALIZED (R); } /* R <- S - n mod modulus If repr == ECM_MOD_MODMULN or ECM_MOD_REDC, we need to convert n to Montgomery representation before substracting */ void mpres_sub_ui (mpres_t R, const mpres_t S, const unsigned long n, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_sub_ui (R, S, n); if (mpz_sgn (R) < 0) mpz_add (R, R, modulus->orig_modulus); /* Assumes modulus >= n */ } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_ui (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_sub (modulus->temp1, S, modulus->temp1); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } #if 0 /* R <- n - S mod modulus If repr == ECM_MOD_MODMULN or ECM_MOD_REDC, we need to convert n to Montgomery representation before substracting */ void mpres_ui_sub (mpres_t R, const unsigned long n ,const mpres_t S, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_ui_sub (R, n, S); if (mpz_sgn (R) < 0) mpz_add (R, R, modulus->orig_modulus); /* Assumes modulus >= n */ } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_ui (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_sub (modulus->temp1, modulus->temp1, S); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } #endif /* R <- S1 - S2 mod modulus */ void mpres_sub (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { ASSERT_NORMALIZED (S1); ASSERT_NORMALIZED (S2); mpz_sub (R, S1, S2); if ((modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) && ABSIZ(R) > ABSIZ(modulus->orig_modulus)) { if (SIZ(R) > 0) mpz_sub (R, R, modulus->multiple); else mpz_add (R, R, modulus->multiple); /* N <= since multiple < 2^Nbits + N, now |R| < B */ } ASSERT_NORMALIZED (R); } void mpres_set_z (mpres_t R, const mpz_t S, mpmod_t modulus) { if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) mpz_mod (R, S, modulus->orig_modulus); else if (modulus->repr == ECM_MOD_MODMULN) { mpz_mod (modulus->temp2, S, modulus->orig_modulus); ecm_mulredc_basecase (R, modulus->temp2, modulus->R2, modulus); } else if (modulus->repr == ECM_MOD_REDC) { mpz_mod (modulus->temp2, S, modulus->orig_modulus); mpz_mul (modulus->temp1, modulus->temp2, modulus->R2); REDC (R, modulus->temp1, modulus->temp2, modulus); } ASSERT_NORMALIZED (R); } /* R and S must not be modulus->temp1 */ void mpres_get_z (mpz_t R, const mpres_t S, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_mod (R, S, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_MODMULN) { mpz_set (modulus->temp1, S); MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_redc_basecase (R, modulus->temp1, modulus); mpz_mod (R, R, modulus->orig_modulus); /* FIXME: can we avoid this? */ } else if (modulus->repr == ECM_MOD_REDC) { REDC (R, S, modulus->temp1, modulus); mpz_mod (R, R, modulus->orig_modulus); /* FIXME: can we avoid this? */ } #ifdef DEBUG else { fprintf (ECM_STDERR, "mpres_get_z: Unexpected representation %d\n", modulus->repr); exit (EXIT_FAILURE); } #endif } /* R <- n mod modulus If repr==ECM_MOD_MPZ or ECM_MOD_BASE2, we convert n to Montgomery representation */ void mpres_set_ui (mpres_t R, const unsigned long n, mpmod_t modulus) { if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_set_ui (R, n); mpz_mod (R, R, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_ui (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* same as previous but with signed long */ void mpres_set_si (mpres_t R, const long n, mpmod_t modulus) { if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_set_si (R, n); mpz_mod (R, R, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_si (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* R <- -S mod modulus. Does not need to be efficient. */ void mpres_neg (mpres_t R, const mpres_t S, ATTRIBUTE_UNUSED mpmod_t modulus) { ASSERT_NORMALIZED (S); mpz_neg (R, S); ASSERT_NORMALIZED (R); } /* Returns non-zero if inversion succeeded, and zero if not */ int mpres_invert (mpres_t R, const mpres_t S, mpmod_t modulus) { #ifdef WANT_ASSERT_EXPENSIVE mpres_t test; mpz_t test_result; mpres_init (test, modulus); mpres_set (test, S, modulus); #endif ASSERT_NORMALIZED (S); if (mpz_invert (modulus->temp2, S, modulus->orig_modulus) == 0) return 0; if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_set (R, modulus->temp2); ASSERT_NORMALIZED (R); } else if (modulus->repr == ECM_MOD_MODMULN) { ecm_mulredc_basecase (R, modulus->temp2, modulus->R3, modulus); ASSERT_NORMALIZED (R); } else if (modulus->repr == ECM_MOD_REDC) { MPZ_NORMALIZED (S); mpz_mul (modulus->temp1, modulus->temp2, modulus->R3); REDC (R, modulus->temp1, modulus->temp2, modulus); ASSERT_NORMALIZED (R); } #ifdef DEBUG else { fprintf (ECM_STDERR, "mpres_invert: Unexpected representation %d\n", modulus->repr); exit (EXIT_FAILURE); } #endif #ifdef WANT_ASSERT_EXPENSIVE mpres_mul (test, test, R, modulus); mpz_init (test_result); mpres_get_z (test_result, test, modulus); ASSERT_ALWAYS(mpz_cmp_ui (test_result, 1UL) == 0); mpz_clear (test_result); mpres_clear (test, modulus); #endif return 1; } void mpres_gcd (mpz_t R, const mpres_t S, const mpmod_t modulus) { /* In MODMULN and REDC form, M(x) = x*R with gcd(R, modulus) = 1 . Therefore gcd(M(x), modulus) = gcd(x, modulus) and we need not bother to convert out of Montgomery form. */ ASSERT_NORMALIZED (S); mpz_gcd (R, S, modulus->orig_modulus); } #ifdef HAVE_ADDLAWS /* Returns non-zero if the two residues are equal, and zero if they are not */ int mpres_equal (const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mpz_mod (modulus->temp1, S1, modulus->orig_modulus); mpz_mod (modulus->temp2, S2, modulus->orig_modulus); return (mpz_cmp (modulus->temp1, modulus->temp2) == 0); } #endif #if 0 /* those routines are not called in normal operation */ void mpres_out_str (FILE *fd, const unsigned int base, const mpres_t S, mpmod_t modulus) { mpres_get_z (modulus->temp2, S, modulus); mpz_out_str (fd, base, modulus->temp2); } /* Multiplies S1 by the one-limb integer S2, and does modulo reduction. The modulo reduction may imply multiplication of the residue class by some constant, since we may not do the correct number of REDC reduction passes and so fail to divide by the correct power of 2 for Montgomery representation. The constant is the same for each call of this function with a given modulus, however. */ static void ecm_mulredc_1_basecase (mpres_t R, const mpres_t S1, const mp_limb_t S2, mpmod_t modulus) { mp_ptr s1p; mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS; ASSERT(ALLOC(R) >= nn); ASSERT(ALLOC(S1) >= nn); s1p = PTR(S1); for (j = ABSIZ(S1); j < nn; j++) s1p[j] = 0; #ifdef HAVE_NATIVE_MULREDC1_N if (nn < 20) { mp_ptr rp = PTR(R); mulredc_1(rp, S2, s1p, PTR(modulus->orig_modulus), nn, modulus->Nprim[0]); MPN_NORMALIZE (rp, nn); SIZ(R) = (SIZ(S1)) < 0 ? (int) -nn : (int) nn; } else #endif { /* FIXME, we can do much better than this */ mpz_mul_ui (modulus->temp1, S1, S2); mpz_mod(R, modulus->temp1, modulus->orig_modulus); } } /* Multiplies S by n and possibly divides by some constant. Whether or not it divides depends on the modulus representation and the modulus size. */ void mpres_muldivbysomething_si (mpres_t R, const mpres_t S, const long n, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MODMULN && modulus->bits / GMP_NUMB_BITS <= 20) /* FIXME: is the 20 here the same constant as in mulredc1_20? If so, it should be changed into a macro. */ { MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); if (n < 0) { ecm_mulredc_1_basecase (R, S, (mp_limb_t) -n, modulus); mpres_neg (R, R, modulus); } else { ecm_mulredc_1_basecase (R, S, (mp_limb_t) n, modulus); } } else { mpz_mul_si (modulus->temp1, S, n); /* This is the same for all methods: just reduce with original modulus */ mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* Returns 1 if successful, 0 if not */ static int test_mpres_set_z_for_gcd_fix (const int maxk, mpmod_t modulus) { mpres_t m, prod; mpz_t n; int k; mpres_init (prod, modulus); mpres_init (m, modulus); mpz_init (n); /* m = 1 * c, where c is a constant depending on mod reduction method */ mpz_set_ui (n, 1); mpres_set_z_for_gcd (m, n, modulus); for (k = 0; k <= maxk; k++) { int i; mpres_set_ui (prod, 1, modulus); /* Compute prod = 1 * (1 * c)^k */ for (i = 0; i < k; i++) mpres_mul (prod, prod, m, modulus); /* Divide prod by c^k */ mpz_set_ui (n, k); mpres_set_z_for_gcd_fix (prod, prod, n, modulus); /* Result should be 1 */ mpres_get_z (n, prod, modulus); if (mpz_cmp_ui (n, 1) != 0) { gmp_printf ("Error: test of mpres_set_z_for_gcd_fix() failed, k = %d, n = %Zd\n", k, n); return 0; } } mpz_clear (n); mpres_clear (m, modulus); mpres_clear (prod, modulus); return 1; } int mpmod_selftest (const mpz_t n) { mpres_t test1, test2; mpmod_t modulus; printf ("Performing self test of modular arithmetic\n"); mpmod_init (modulus, n, 0); mpres_init (test1, modulus); mpres_init (test2, modulus); mpres_set_ui (test1, 2, modulus); mpres_set_ui (test2, 5, modulus); mpres_muldivbysomething_si (test1, test1, 5, modulus); mpres_muldivbysomething_si (test2, test2, 2, modulus); if (!mpres_equal (test1, test2, modulus)) { printf ("mpres_muldivbysomething_si() wrong\n"); fflush (stdout); abort(); } mpres_clear (test1, modulus); mpres_clear (test2, modulus); if (!test_mpres_set_z_for_gcd_fix(10, modulus)) abort(); mpmod_clear (modulus); return 0; } #endif /* #if 0 */ /****************************************************/ /* mpresn: modular arithmetic based directly on mpn */ /****************************************************/ /* We use here a signed word-based redundant representation. In case N < B^n/16 (since for redc where we add to the absolute value of the residue), where n is the number of limbs of N in base B (2^32 or 2^64 usually), we can prove there is no adjustment (adding or subtracting N), cf http://www.loria.fr/~zimmerma/papers/norm.pdf. However current branch predictors are quite good, thus we prefer to keep the tests and to allow any input N (instead of only N < B^n/16). */ /* ensure R has allocated space for at least n limbs, and if less than n limbs are used, pad with zeros, and set SIZ(R) to n if positive or -n if negative */ void mpresn_pad (mpres_t R, mpmod_t N) { mp_size_t n = ABSIZ(N->orig_modulus); mp_size_t rn; _mpz_realloc (R, n); rn = mpz_size (R); ASSERT_ALWAYS (rn <= n); if (rn < n) { MPN_ZERO (PTR(R) + rn, n - rn); SIZ(R) = SIZ(R) >= 0 ? n : -n; } } void mpresn_unpad (mpres_t R) { mp_size_t n = ABSIZ(R); while (n > 0 && PTR(R)[n-1] == 0) n--; SIZ(R) = SIZ(R) >= 0 ? n : -n; } /* R <- S1 * S1 mod N, used only for ECM_MOD_MODMULN */ void mpresn_sqr (mpres_t R, const mpres_t S1, mpmod_t modulus) { mp_size_t n = ABSIZ(modulus->orig_modulus); ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ecm_sqrredc_basecase_n (PTR(R), PTR(S1), PTR(modulus->orig_modulus), n, modulus->Nprim, PTR(modulus->temp1)); SIZ(R) = n; } /* R <- S1 * S2 mod N, used only for ECM_MOD_MODMULN */ void mpresn_mul (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_size_t n = ABSIZ(modulus->orig_modulus); ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ASSERT (SIZ(S2) == n || -SIZ(S2) == n); ecm_mulredc_basecase_n (PTR(R), PTR(S1), PTR(S2), PTR(modulus->orig_modulus), n, modulus->Nprim, PTR(modulus->temp1)); SIZ(R) = SIZ(S1) == SIZ(S2) ? n : -n; } /* R <- S*m/B mod modulus where m fits in a mp_limb_t. Here S (w in dup_add_batch1) is the result of a subtraction, thus with the notations from http://www.loria.fr/~zimmerma/papers/norm.pdf we have S < 2 \alpha N. Then R < (2 \alpha N \beta + \beta N) = (2 \alpha + 1) N. This result R is used in an addition with u being the result of a squaring thus u < \alpha N, which gives a result < (3 \alpha + 1) N. Finally this result is used in a multiplication with another operand less than 2 \alpha N, thus we want: ((2 \alpha) (3 \alpha + 1) N^2 + \beta N)/\beta \leq \alpha N, i.e., 2 \alpha (3 \alpha + 1) \varepsilon + 1 \leq \alpha This implies \varepsilon \leq 7/2 - sqrt(3)/2 ~ 0.0359, in which case we can take \alpha = 2/3*sqrt(3)+1 ~ 2.1547. In that case no adjustment is needed in mpresn_mul_1. However we prefer to keep the adjustment here, to allow a larger set of inputs (\varepsilon \leq 1/16 = 0.0625 instead of 0.0359). */ void mpresn_mul_1 (mpres_t R, const mpres_t S, const mp_limb_t m, mpmod_t modulus) { mp_ptr t1 = PTR(modulus->temp1); mp_ptr t2 = PTR(modulus->temp2); mp_size_t n = ABSIZ(modulus->orig_modulus); mp_limb_t q; ASSERT (SIZ(S) == n || -SIZ(S) == n); ASSERT (ALLOC(modulus->temp1) >= n+1); #if defined(USE_ASM_REDC) && defined(HAVE_NATIVE_MULREDC1_N) if (n <= MULREDC_ASSEMBLY_MAX) mulredc_1 (PTR(R), m, PTR(S), PTR(modulus->orig_modulus), n, modulus->Nprim[0]); else #endif { t1[n] = mpn_mul_1 (t1, PTR(S), n, m); q = t1[0] * modulus->Nprim[0]; t2[n] = mpn_mul_1 (t2, PTR(modulus->orig_modulus), n, q); // Disabled to compile with GMP 6.3 // #ifdef HAVE___GMPN_ADD_NC // q = __gmpn_add_nc (PTR(R), t1 + 1, t2 + 1, n, t1[0] != 0); // #else q = mpn_add_n (PTR(R), t1 + 1, t2 + 1, n); q += mpn_add_1 (PTR(R), PTR(R), n, t1[0] != 0); // #endif while (q != 0) q -= mpn_sub_n (PTR(R), PTR(R), PTR(modulus->orig_modulus), n); } SIZ(R) = SIZ(S); /* sign is unchanged */ } /* R <- S1 + S2 mod modulus */ /* we assume all numbers are allocated to n limbs, and unused most significant limbs are set to zero */ void mpresn_add (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_ptr r = PTR(R); mp_ptr s1 = PTR(S1); mp_ptr s2 = PTR(S2); mp_size_t n = ABSIZ(modulus->orig_modulus); ATTRIBUTE_UNUSED mp_limb_t cy; ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ASSERT (SIZ(S2) == n || -SIZ(S2) == n); if (SIZ(S1) == SIZ(S2)) /* S1 and S2 are of same sign */ { cy = mpn_add_n (r, s1, s2, n); /* for N < B^n/16, the while loop will be never performed, which proves it will be performed a small number of times. In practice we observed up to 7 loops, but it happens rarely. */ #ifndef MPRESN_NO_ADJUSTMENT while (cy != 0) cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n); #endif SIZ(R) = SIZ(S1); } else /* different signs */ { if (mpn_cmp (s1, s2, n) >= 0) { mpn_sub_n (r, s1, s2, n); /* no borrow here */ SIZ(R) = SIZ(S1); } else { mpn_sub_n (r, s2, s1, n); /* idem */ SIZ(R) = SIZ(S2); } } } void mpresn_sub (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_ptr r = PTR(R); mp_ptr s1 = PTR(S1); mp_ptr s2 = PTR(S2); mp_size_t n = ABSIZ(modulus->orig_modulus); ATTRIBUTE_UNUSED mp_limb_t cy; ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ASSERT (SIZ(S2) == n || -SIZ(S2) == n); if (SIZ(S1) != SIZ(S2)) /* S1 and S2 are of different signs */ { cy = mpn_add_n (r, s1, s2, n); #ifndef MPRESN_NO_ADJUSTMENT while (cy != 0) cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n); #endif SIZ(R) = SIZ(S1); } else /* same signs, it's a real subtraction */ { if (mpn_cmp (s1, s2, n) >= 0) { mpn_sub_n (r, s1, s2, n); /* no borrow here */ SIZ(R) = SIZ(S1); } else { mpn_sub_n (r, s2, s1, n); /* idem */ SIZ(R) = -SIZ(S2); } } } /* (R, T) <- (S1 + S2, S1 - S2) Assume R differs from both S1 and S2. */ void mpresn_addsub (mpres_t R, mpres_t T, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_ptr r = PTR(R); mp_ptr t = PTR(T); mp_ptr s1 = PTR(S1); mp_ptr s2 = PTR(S2); mp_size_t n = ABSIZ(modulus->orig_modulus); ATTRIBUTE_UNUSED mp_limb_t cy; ASSERT (R != S1); ASSERT (R != S2); ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ASSERT (SIZ(S2) == n || -SIZ(S2) == n); if (SIZ(S1) == SIZ(S2)) /* S1 and S2 are of same sign */ { cy = mpn_add_n (r, s1, s2, n); #ifndef MPRESN_NO_ADJUSTMENT while (cy != 0) cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n); #endif SIZ(R) = SIZ(S1); if (mpn_cmp (s1, s2, n) >= 0) { mpn_sub_n (t, s1, s2, n); /* no borrow since {s1,n} >= {s2,n} */ SIZ(T) = SIZ(S1); } else { mpn_sub_n (t, s2, s1, n); /* idem since {s2,n} >= {s1,n} */ SIZ(T) = -SIZ(S2); } } else /* different signs */ { if (mpn_cmp (s1, s2, n) >= 0) { mpn_sub_n (r, s1, s2, n); /* no borrow since {s1,n} >= {s2,n} */ SIZ(R) = SIZ(S1); } else { mpn_sub_n (r, s2, s1, n); /* idem since {s2,n} >= {s1,n} */ SIZ(R) = SIZ(S2); } cy = mpn_add_n (t, s1, s2, n); #ifndef MPRESN_NO_ADJUSTMENT while (cy != 0) cy -= mpn_sub_n (t, t, PTR(modulus->orig_modulus), n); #endif SIZ(T) = SIZ(S1); } }