/* Elliptic Curve Method: toplevel and stage 1 routines. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2016, 2024 Paul Zimmermann, Alexander Kruppa, Cyril Bouvier, David Cleaver, Philip McLaughlin. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include "ecm-impl.h" #include "getprime_r.h" #include #ifdef HAVE_ADDLAWS #include "addlaws.h" #endif #ifdef HAVE_LIMITS_H # include #else # define ULONG_MAX __GMP_ULONG_MAX #endif #ifdef TIMING_CRT extern int mpzspv_from_mpzv_slow_time, mpzspv_to_mpzv_time, mpzspv_normalise_time; #endif /* the following factor takes into account the smaller expected smoothness for Montgomery's curves (batch mode) with respect to Suyama's curves */ /* For param 1 we use A=4d-2 with d a square (see main.c). In that case, Cyril Bouvier and Razvan Barbulescu have shown that the average expected torsion is that of a generic Suyama curve multiplied by the constant 2^(1/3)/(3*3^(1/128)) */ #define EXTRA_SMOOTHNESS_SQUARE 0.416384512396064 /* For A=4d-2 (param 3) for d a random integer, the average expected torsion is that of a generic Suyama curve multiplied by the constant 1/(3*3^(1/128)) */ #define EXTRA_SMOOTHNESS_32BITS_D 0.330484606500389 /****************************************************************************** * * * Elliptic Curve Method * * * ******************************************************************************/ void duplicate (mpres_t, mpres_t, mpres_t, mpres_t, mpmod_t, mpres_t, mpres_t, mpres_t, mpres_t) ATTRIBUTE_HOT; void add3 (mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpmod_t, mpres_t, mpres_t, mpres_t) ATTRIBUTE_HOT; #define mpz_mulmod5(r,s1,s2,m,t) { mpz_mul(t,s1,s2); mpz_mod(r, t, m); } /* PBMcL additions for Lucas chain codes */ /* 3-bit chain start sequences. Covers all possible Lucas chains */ #define CHAIN_START_5_8_13 0x7 #define CHAIN_START_5_8_11 0x6 #define CHAIN_START_5_8_10 0x5 #define CHAIN_START_5_7 0x4 #define CHAIN_START_5_6 0x3 #define CHAIN_START_4_7 0x2 #define CHAIN_START_4_5 0x1 #define CHAIN_START_4_6 0x0 /* precludes a completely zero code */ typedef struct { /* note: "parent" is the immediately previous chain element */ uint64_t value; /* integer value of this chain element */ uint8_t comp_offset_1; /* larger summand (summand_1) component index counting back from parent (parent = 0) */ uint8_t comp_offset_2; /* smaller summand (summand_2) component index counting back from parent */ uint8_t dif_offset; /* component index of (summand_1 - summand_2) counting back from parent */ /* note: dif_offset = 0 will indicate that this is a doubled element */ } chain_element; /* prototypes */ static void max_continuation( chain_element *, uint8_t *, uint8_t ); static uint8_t generate_Lucas_chain( uint64_t, uint64_t, chain_element * ); /* end PBMcL additions */ /* switch from Montgomery's form g*y^2 = x^3 + a*x^2 + x to Weierstrass' form Y^2 = X^3 + A*X + B by change of variables x -> g*X-a/3, y -> g*Y. We have A = (3-a^2)/(3g^2), X = (3x+a)/(3g), Y = y/g. If a factor is found during the modular inverse, returns ECM_FACTOR_FOUND_STEP1 and the factor in f, otherwise returns ECM_NO_FACTOR_FOUND. The input value of y is the y0 value in the initial equation: g*y0^2 = x0^3 + a*x0^2 + x0. */ int montgomery_to_weierstrass (mpz_t f, mpres_t x, mpres_t y, mpres_t A, mpmod_t n) { mpres_t g; mpres_init (g, n); mpres_add (g, x, A, n); mpres_mul (g, g, x, n); mpres_add_ui (g, g, 1, n); mpres_mul (g, g, x, n); /* g = x^3+a*x^2+x (y=1) */ mpres_mul_ui (y, g, 3, n); mpres_mul (y, y, g, n); /* y = 3g^2 */ if (!mpres_invert (y, y, n)) /* y = 1/(3g^2) temporarily */ { mpres_gcd (f, y, n); mpres_clear (g, n); return ECM_FACTOR_FOUND_STEP1; } /* update x */ mpres_mul_ui (x, x, 3, n); /* 3x */ mpres_add (x, x, A, n); /* 3x+a */ mpres_mul (x, x, g, n); /* (3x+a)*g */ mpres_mul (x, x, y, n); /* (3x+a)/(3g) */ /* update A */ mpres_sqr (A, A, n); /* a^2 */ mpres_sub_ui (A, A, 3, n); mpres_neg (A, A, n); /* 3-a^2 */ mpres_mul (A, A, y, n); /* (3-a^2)/(3g^2) */ /* update y */ mpres_mul_ui (g, g, 3, n); /* 3g */ mpres_mul (y, y, g, n); /* (3g)/(3g^2) = 1/g */ mpres_clear (g, n); return ECM_NO_FACTOR_FOUND; } /* adds Q=(x2:z2) and R=(x1:z1) and puts the result in (x3:z3), using 6 muls (4 muls and 2 squares), and 6 add/sub. One assumes that Q-R=P or R-Q=P where P=(x:z). - n : number to factor - u, v, w : auxiliary variables Modifies: x3, z3, u, v, w. (x3,z3) may be identical to (x2,z2) and to (x,z) */ void add3 (mpres_t x3, mpres_t z3, mpres_t x2, mpres_t z2, mpres_t x1, mpres_t z1, mpres_t x, mpres_t z, mpmod_t n, mpres_t u, mpres_t v, mpres_t w) { mpres_sub (u, x2, z2, n); mpres_add (v, x1, z1, n); /* u = x2-z2, v = x1+z1 */ mpres_mul (u, u, v, n); /* u = (x2-z2)*(x1+z1) */ mpres_add (w, x2, z2, n); mpres_sub (v, x1, z1, n); /* w = x2+z2, v = x1-z1 */ mpres_mul (v, w, v, n); /* v = (x2+z2)*(x1-z1) */ mpres_add (w, u, v, n); /* w = 2*(x1*x2-z1*z2) */ mpres_sub (v, u, v, n); /* v = 2*(x2*z1-x1*z2) */ mpres_sqr (w, w, n); /* w = 4*(x1*x2-z1*z2)^2 */ mpres_sqr (v, v, n); /* v = 4*(x2*z1-x1*z2)^2 */ if (x == x3) /* same variable: in-place variant */ { /* x3 <- w * z mod n z3 <- x * v mod n */ mpres_mul (z3, w, z, n); mpres_mul (x3, x, v, n); mpres_swap (x3, z3, n); } else { mpres_mul (x3, w, z, n); /* x3 = 4*z*(x1*x2-z1*z2)^2 mod n */ mpres_mul (z3, x, v, n); /* z3 = 4*x*(x2*z1-x1*z2)^2 mod n */ } /* mul += 6; */ } /* computes 2P=(x2:z2) from P=(x1:z1), with 5 muls (3 muls and 2 squares) and 4 add/sub. - n : number to factor - b : (a+2)/4 mod n - t, u, v, w : auxiliary variables */ void duplicate (mpres_t x2, mpres_t z2, mpres_t x1, mpres_t z1, mpmod_t n, mpres_t b, mpres_t u, mpres_t v, mpres_t w) { mpres_add (u, x1, z1, n); mpres_sqr (u, u, n); /* u = (x1+z1)^2 mod n */ mpres_sub (v, x1, z1, n); mpres_sqr (v, v, n); /* v = (x1-z1)^2 mod n */ mpres_mul (x2, u, v, n); /* x2 = u*v = (x1^2 - z1^2)^2 mod n */ mpres_sub (w, u, v, n); /* w = u-v = 4*x1*z1 */ mpres_mul (u, w, b, n); /* u = w*b = ((A+2)/4*(4*x1*z1)) mod n */ mpres_add (u, u, v, n); /* u = (x1-z1)^2+(A+2)/4*(4*x1*z1) */ mpres_mul (z2, w, u, n); /* z2 = ((4*x1*z1)*((x1-z1)^2+(A+2)/4*(4*x1*z1))) mod n */ } /* multiply P=(x:z) by e and puts the result in (x:z). */ void ecm_mul (mpres_t x, mpres_t z, mpz_t e, mpmod_t n, mpres_t b) { size_t l; int negated = 0; mpres_t x0, z0, x1, z1, u, v, w; /* In Montgomery coordinates, the point at infinity is (0::0) */ if (mpz_sgn (e) == 0) { mpz_set_ui (x, 0); mpz_set_ui (z, 0); return; } /* The negative of a point (x:y:z) is (x:-y:u). Since we do not compute y, e*(x::z) == (-e)*(x::z). */ if (mpz_sgn (e) < 0) { negated = 1; mpz_neg (e, e); } if (mpz_cmp_ui (e, 1) == 0) goto ecm_mul_end; mpres_init (x0, n); mpres_init (z0, n); mpres_init (x1, n); mpres_init (z1, n); mpres_init (u, n); mpres_init (v, n); mpres_init (w, n); l = mpz_sizeinbase (e, 2) - 1; /* l >= 1 */ mpres_set (x0, x, n); mpres_set (z0, z, n); duplicate (x1, z1, x0, z0, n, b, u, v, w); /* invariant: (P1,P0) = ((k+1)P, kP) where k = floor(e/2^l) */ while (l-- > 0) { if (ecm_tstbit (e, l)) /* k, k+1 -> 2k+1, 2k+2 */ { add3 (x0, z0, x0, z0, x1, z1, x, z, n, u, v, w); /* 2k+1 */ duplicate (x1, z1, x1, z1, n, b, u, v, w); /* 2k+2 */ } else /* k, k+1 -> 2k, 2k+1 */ { add3 (x1, z1, x1, z1, x0, z0, x, z, n, u, v, w); /* 2k+1 */ duplicate (x0, z0, x0, z0, n, b, u, v, w); /* 2k */ } } mpres_set (x, x0, n); mpres_set (z, z0, n); mpres_clear (x0, n); mpres_clear (z0, n); mpres_clear (x1, n); mpres_clear (z1, n); mpres_clear (u, n); mpres_clear (v, n); mpres_clear (w, n); ecm_mul_end: /* Undo negation to avoid changing the caller's e value */ if (negated) mpz_neg (e, e); } #define ADD 6.0 /* number of multiplications in an addition */ #define DUP 5.0 /* number of multiplications in a duplicate */ /* returns the number of modular multiplications for computing V_n from V_r * V_{n-r} - V_{n-2r}. ADD is the cost of an addition DUP is the cost of a duplicate */ static double lucas_cost (ecm_uint n, double v) { ecm_uint d, e, r; double c; /* cost */ d = n; r = (ecm_uint) ((double) d * v + 0.5); if (r >= n) return (ADD * (double) n); d = n - r; e = 2 * r - n; c = DUP + ADD; /* initial duplicate and final addition */ while (d != e) { if (d < e) { r = d; d = e; e = r; } if (d - e <= e / 4 && ((d + e) % 3) == 0) { /* condition 1 */ d = (2 * d - e) / 3; e = (e - d) / 2; c += 3.0 * ADD; /* 3 additions */ } else if (d - e <= e / 4 && (d - e) % 6 == 0) { /* condition 2 */ d = (d - e) / 2; c += ADD + DUP; /* one addition, one duplicate */ } else if ((d + 3) / 4 <= e) { /* condition 3 */ d -= e; c += ADD; /* one addition */ } else if ((d + e) % 2 == 0) { /* condition 4 */ d = (d - e) / 2; c += ADD + DUP; /* one addition, one duplicate */ } /* now d+e is odd */ else if (d % 2 == 0) { /* condition 5 */ d /= 2; c += ADD + DUP; /* one addition, one duplicate */ } /* now d is odd and e is even */ else if (d % 3 == 0) { /* condition 6 */ d = d / 3 - e; c += 3.0 * ADD + DUP; /* three additions, one duplicate */ } else if ((d + e) % 3 == 0) { /* condition 7 */ d = (d - 2 * e) / 3; c += 3.0 * ADD + DUP; /* three additions, one duplicate */ } else if ((d - e) % 3 == 0) { /* condition 8 */ d = (d - e) / 3; c += 3.0 * ADD + DUP; /* three additions, one duplicate */ } else /* necessarily e is even: catches all cases */ { /* condition 9 */ e /= 2; c += ADD + DUP; /* one addition, one duplicate */ } } return c; } /* computes kP from P=(xA:zA) and puts the result in (xA:zA). Assumes k>2. WARNING! The calls to add3() assume that the two input points are distinct, which is not neccessarily satisfied. The result can be that in rare cases the point at infinity (z==0) results when it shouldn't. A test case is echo 33554520197234177 | ./ecm -sigma 2046841451 373 1 which finds the prime even though it shouldn't (23^2=529 divides order). This is not a problem for ECM since at worst we'll find a factor we shouldn't have found. For other purposes (i.e. primality proving) this would have to be fixed first. */ static void prac (mpres_t xA, mpres_t zA, ecm_uint k, mpmod_t n, mpres_t b, mpres_t u, mpres_t v, mpres_t w, mpres_t xB, mpres_t zB, mpres_t xC, mpres_t zC, mpres_t xT, mpres_t zT, mpres_t xT2, mpres_t zT2) { ecm_uint d, e, r, i = 0, nv; double c, cmin; __mpz_struct *tmp; #define NV 10 /* 1/val[0] = the golden ratio (1+sqrt(5))/2, and 1/val[i] for i>0 is the real number whose continued fraction expansion is all 1s except for a 2 in i+1-st place */ static double val[NV] = { 0.61803398874989485, 0.72360679774997897, 0.58017872829546410, 0.63283980608870629, 0.61242994950949500, 0.62018198080741576, 0.61721461653440386, 0.61834711965622806, 0.61791440652881789, 0.61807966846989581}; /* for small n, it makes no sense to try 10 different Lucas chains */ nv = mpz_size ((mpz_ptr) n); if (nv > NV) nv = NV; if (nv > 1) { /* chooses the best value of v */ for (d = 0, cmin = ADD * (double) k; d < nv; d++) { c = lucas_cost (k, val[d]); if (c < cmin) { cmin = c; i = d; } } } d = k; r = (ecm_uint) ((double) d * val[i] + 0.5); /* first iteration always begins by Condition 3, then a swap */ d = k - r; e = 2 * r - k; mpres_set (xB, xA, n); mpres_set (zB, zA, n); /* B=A */ mpres_set (xC, xA, n); mpres_set (zC, zA, n); /* C=A */ duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */ while (d != e) { if (d < e) { r = d; d = e; e = r; mpres_swap (xA, xB, n); mpres_swap (zA, zB, n); } /* do the first line of Table 4 whose condition qualifies */ if (d - e <= e / 4 && ((d + e) % 3) == 0) { /* condition 1 */ d = (2 * d - e) / 3; e = (e - d) / 2; add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */ add3 (xT2, zT2, xT, zT, xA, zA, xB, zB, n, u, v, w); /* T2 = f(T,A,B) */ add3 (xB, zB, xB, zB, xT, zT, xA, zA, n, u, v, w); /* B = f(B,T,A) */ mpres_swap (xA, xT2, n); mpres_swap (zA, zT2, n); /* swap A and T2 */ } else if (d - e <= e / 4 && (d - e) % 6 == 0) { /* condition 2 */ d = (d - e) / 2; add3 (xB, zB, xA, zA, xB, zB, xC, zC, n, u, v, w); /* B = f(A,B,C) */ duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */ } else if ((d + 3) / 4 <= e) { /* condition 3 */ d -= e; add3 (xT, zT, xB, zB, xA, zA, xC, zC, n, u, v, w); /* T = f(B,A,C) */ /* circular permutation (B,T,C) */ tmp = xB; xB = xT; xT = xC; xC = tmp; tmp = zB; zB = zT; zT = zC; zC = tmp; } else if ((d + e) % 2 == 0) { /* condition 4 */ d = (d - e) / 2; add3 (xB, zB, xB, zB, xA, zA, xC, zC, n, u, v, w); /* B = f(B,A,C) */ duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */ } /* now d+e is odd */ else if (d % 2 == 0) { /* condition 5 */ d /= 2; add3 (xC, zC, xC, zC, xA, zA, xB, zB, n, u, v, w); /* C = f(C,A,B) */ duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */ } /* now d is odd, e is even */ else if (d % 3 == 0) { /* condition 6 */ d = d / 3 - e; duplicate (xT, zT, xA, zA, n, b, u, v, w); /* T = 2*A */ add3 (xT2, zT2, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T2 = f(A,B,C) */ add3 (xA, zA, xT, zT, xA, zA, xA, zA, n, u, v, w); /* A = f(T,A,A) */ add3 (xT, zT, xT, zT, xT2, zT2, xC, zC, n, u, v, w); /* T = f(T,T2,C) */ /* circular permutation (C,B,T) */ tmp = xC; xC = xB; xB = xT; xT = tmp; tmp = zC; zC = zB; zB = zT; zT = tmp; } else if ((d + e) % 3 == 0) { /* condition 7 */ d = (d - 2 * e) / 3; add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */ add3 (xB, zB, xT, zT, xA, zA, xB, zB, n, u, v, w); /* B = f(T,A,B) */ duplicate (xT, zT, xA, zA, n, b, u, v, w); add3 (xA, zA, xA, zA, xT, zT, xA, zA, n, u, v, w); /* A = 3*A */ } else if ((d - e) % 3 == 0) { /* condition 8 */ d = (d - e) / 3; add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */ add3 (xC, zC, xC, zC, xA, zA, xB, zB, n, u, v, w); /* C = f(A,C,B) */ mpres_swap (xB, xT, n); mpres_swap (zB, zT, n); /* swap B and T */ duplicate (xT, zT, xA, zA, n, b, u, v, w); add3 (xA, zA, xA, zA, xT, zT, xA, zA, n, u, v, w); /* A = 3*A */ } else /* necessarily e is even here */ { /* condition 9 */ e /= 2; add3 (xC, zC, xC, zC, xB, zB, xA, zA, n, u, v, w); /* C = f(C,B,A) */ duplicate (xB, zB, xB, zB, n, b, u, v, w); /* B = 2*B */ } } add3 (xA, zA, xA, zA, xB, zB, xC, zC, n, u, v, w); ASSERT(d == 1); } /* functions for using optimal (or near-optimal) Lucas chains instead of PRAC. Added by P McLaughlin 2023 */ /* To verify that Lucas chains are being generated correctly, configure with --enable-assert */ /* generate Lucas chain for a prime >= 2. * * inputs: (1) prime p (the final chain value) * (2) chain_code for p read from input file (starting with p = 11) * * Outputs: (1) Lchain - an array of Lucas chain elements leading to p * (2) chain length */ static uint8_t generate_Lucas_chain( uint64_t prime, uint64_t chain_code, chain_element *Lchain ) { uint8_t code_fragment, chain_length, i, k; uint64_t dif; /* the code file starts at p = 11, so handle 2, 3, 5, or 7 separately */ if( prime < 11 ) { if(prime == 2) { chain_length = 1; return chain_length; } else if( prime == 3 ) { chain_length = 2; return chain_length; } else if( prime == 5 ) { Lchain[3].value = 5; Lchain[3].comp_offset_1 = 0; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 2; chain_length = 3; return chain_length; } else if( prime == 7 ) { Lchain[3].value = 4; Lchain[3].comp_offset_1 = 1; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 0; Lchain[4].value = 7; Lchain[4].comp_offset_1 = 0; Lchain[4].comp_offset_2 = 1; Lchain[4].dif_offset = 3; chain_length = 4; return chain_length; } else { printf("ERROR: generate_Lucas_chain entered with prime = %lu < 11 but != 2, 3, 5 or 7\n", prime); return 0; } } /* first 3 bits of code give the next two or three chain components */ code_fragment = (uint8_t)(chain_code & 0x7); chain_code >>= 3; switch( code_fragment ) { case CHAIN_START_5_8_13: Lchain[3].value = 5; Lchain[3].comp_offset_1 = 0; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 2; Lchain[4].value = 8; Lchain[4].comp_offset_1 = 0; Lchain[4].comp_offset_2 = 1; Lchain[4].dif_offset = 2; Lchain[5].value = 13; Lchain[5].comp_offset_1 = 0; Lchain[5].comp_offset_2 = 1; Lchain[5].dif_offset = 2; chain_length = 5; break; case CHAIN_START_5_8_11: Lchain[3].value = 5; Lchain[3].comp_offset_1 = 0; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 2; Lchain[4].value = 8; Lchain[4].comp_offset_1 = 0; Lchain[4].comp_offset_2 = 1; Lchain[4].dif_offset = 2; Lchain[5].value = 11; Lchain[5].comp_offset_1 = 0; Lchain[5].comp_offset_2 = 2; Lchain[5].dif_offset = 1; chain_length = 5; break; case CHAIN_START_5_8_10: Lchain[3].value = 5; Lchain[3].comp_offset_1 = 0; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 2; Lchain[4].value = 8; Lchain[4].comp_offset_1 = 0; Lchain[4].comp_offset_2 = 1; Lchain[4].dif_offset = 2; Lchain[5].value = 10; Lchain[5].comp_offset_1 = 1; Lchain[5].comp_offset_2 = 1; Lchain[5].dif_offset = 0; chain_length = 5; break; case CHAIN_START_5_7: Lchain[3].value = 5; Lchain[3].comp_offset_1 = 0; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 2; Lchain[4].value = 7; Lchain[4].comp_offset_1 = 0; Lchain[4].comp_offset_2 = 2; Lchain[4].dif_offset = 1; chain_length = 4; break; case CHAIN_START_5_6: Lchain[3].value = 5; Lchain[3].comp_offset_1 = 0; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 2; Lchain[4].value = 6; Lchain[4].comp_offset_1 = 1; Lchain[4].comp_offset_2 = 1; Lchain[4].dif_offset = 0; chain_length = 4; break; case CHAIN_START_4_7: Lchain[3].value = 4; Lchain[3].comp_offset_1 = 1; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 0; Lchain[4].value = 7; Lchain[4].comp_offset_1 = 0; Lchain[4].comp_offset_2 = 1; Lchain[4].dif_offset = 3; chain_length = 4; break; case CHAIN_START_4_6: Lchain[3].value = 4; Lchain[3].comp_offset_1 = 1; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 0; Lchain[4].value = 6; Lchain[4].comp_offset_1 = 1; Lchain[4].comp_offset_2 = 1; Lchain[4].dif_offset = 0; chain_length = 4; break; case CHAIN_START_4_5: Lchain[3].value = 4; Lchain[3].comp_offset_1 = 1; Lchain[3].comp_offset_2 = 1; Lchain[3].dif_offset = 0; Lchain[4].value = 5; Lchain[4].comp_offset_1 = 0; Lchain[4].comp_offset_2 = 3; Lchain[4].dif_offset = 1; chain_length = 4; break; default: /* should never happen */ printf("ERROR: bad chain code start value in generate_Lchain = %u\n", code_fragment); return 0; } /* rebuild chain from code fragments */ while( chain_code != 0 ) { code_fragment = (uint8_t)( chain_code & 0xF ); chain_code >>= 4; switch( code_fragment ) { case 0: /* step type 1 or 4 */ { i = (uint8_t)( chain_code & 0x3 ); chain_code >>= 2; if( i == 3 ) { /* a rare double, but does occur */ Lchain[ chain_length+1 ].value = 2*Lchain[ chain_length-2 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 2; Lchain[ chain_length+1 ].comp_offset_2 = 2; Lchain[ chain_length+1 ].dif_offset = 0; chain_length++; } else { i = 12*(i + 1); max_continuation( Lchain, &chain_length, i ); } break; } case 1: { Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-2 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 2; dif = Lchain[ chain_length ].value - Lchain[ chain_length-2 ].value; k = 1; while( dif < Lchain[ chain_length-k ].value ) k++; ASSERT( (dif == Lchain[ chain_length-k ].value) && (k < 15) ); Lchain[ chain_length+1 ].dif_offset = k; chain_length++; break; } case 2: { Lchain[ chain_length+1 ].value = 2*Lchain[ chain_length-1 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 1; Lchain[ chain_length+1 ].comp_offset_2 = 1; Lchain[ chain_length+1 ].dif_offset = 0; chain_length++; break; } case 3: { i = 1; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-2 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 2; Lchain[ chain_length+1 ].dif_offset = 1; chain_length++; break; } case 4: { i = 1; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = 2*Lchain[ chain_length-1 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 1; Lchain[ chain_length+1 ].comp_offset_2 = 1; Lchain[ chain_length+1 ].dif_offset = 0; chain_length++; break; } case 5: { i = 2; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-2 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 2; Lchain[ chain_length+1 ].dif_offset = 1; chain_length++; break; } case 6: { i = 2; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = 2*Lchain[ chain_length-1 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 1; Lchain[ chain_length+1 ].comp_offset_2 = 1; Lchain[ chain_length+1 ].dif_offset = 0; chain_length++; break; } case 7: { i = 3; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-2 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 2; Lchain[ chain_length+1 ].dif_offset = 1; chain_length++; break; } case 8: { i = 3; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = 2*Lchain[ chain_length-1 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 1; Lchain[ chain_length+1 ].comp_offset_2 = 1; Lchain[ chain_length+1 ].dif_offset = 0; chain_length++; break; } case 9: { i = (uint8_t)( chain_code & 0x3 ); chain_code >>= 2; i += 4; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-2 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 2; Lchain[ chain_length+1 ].dif_offset = 1; chain_length++; break; } case 10: { i = (uint8_t)( chain_code & 0x3 ); chain_code >>= 2; i += 4; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = 2*Lchain[ chain_length-1 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 1; Lchain[ chain_length+1 ].comp_offset_2 = 1; Lchain[ chain_length+1 ].dif_offset = 0; chain_length++; break; } case 11: { i = (uint8_t)( chain_code & 0x3 ); chain_code >>= 2; i += 8; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-2 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 2; Lchain[ chain_length+1 ].dif_offset = 1; chain_length++; break; } case 12: { i = (uint8_t)( chain_code & 0x3 ); chain_code >>= 2; i += 8; max_continuation( Lchain, &chain_length, i ); Lchain[ chain_length+1 ].value = 2*Lchain[ chain_length-1 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 1; Lchain[ chain_length+1 ].comp_offset_2 = 1; Lchain[ chain_length+1 ].dif_offset = 0; chain_length++; break; } case 13: { i = (uint8_t)( chain_code & 0x3 ); chain_code >>= 2; switch( i ) { case 0: /* chain encoding did not require this step until chain length = 44 */ Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-7 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 7; dif = Lchain[ chain_length ].value - Lchain[ chain_length-7 ].value; k = 1; while( dif < Lchain[ chain_length-k ].value ) k++; ASSERT( (dif == Lchain[ chain_length-k ].value) && (k < 15) ); Lchain[ chain_length+1 ].dif_offset = k; chain_length++; break; case 1: Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-8 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 8; dif = Lchain[ chain_length ].value - Lchain[ chain_length-8 ].value; k = 1; while( dif < Lchain[ chain_length-k ].value ) k++; ASSERT( (dif == Lchain[ chain_length-k ].value) && (k < 15) ); Lchain[ chain_length+1 ].dif_offset = k; chain_length++; break; case 2: /* example chain 1 2 3 5 8 10 12 13 */ Lchain[ chain_length+1 ].value = Lchain[ chain_length - 2 ].value + Lchain[ chain_length - 3 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 2; Lchain[ chain_length+1 ].comp_offset_2 = 3; dif = Lchain[ chain_length - 2 ].value - Lchain[ chain_length - 3 ].value; k = 1; while( dif < Lchain[ chain_length-k ].value ) k++; ASSERT( (dif == Lchain[ chain_length-k ].value) && (k < 15) ); Lchain[ chain_length+1 ].dif_offset = k; chain_length++; break; case 3: /* can occur but don't have an example (yet) */ Lchain[ chain_length+1 ].value = Lchain[ chain_length - 2 ].value + Lchain[ chain_length - 4 ].value; Lchain[ chain_length+1 ].comp_offset_1 = 2; Lchain[ chain_length+1 ].comp_offset_2 = 4; dif = Lchain[ chain_length - 2 ].value - Lchain[ chain_length - 4 ].value; k = 1; while( dif < Lchain[ chain_length-k ].value ) k++; ASSERT( (dif == Lchain[ chain_length-k ].value) && (k < 15) ); Lchain[ chain_length+1 ].dif_offset = k; chain_length++; break; default: /* should never happen */ printf("ERROR: unimplemented code fragment 0xiD, i = %u\n", i); return 0; } break; } case 14: { i = (uint8_t)( chain_code & 0x3 ); chain_code >>= 2; Lchain[ chain_length+1 ].value = Lchain[ chain_length ].value + Lchain[ chain_length-3-i ].value; Lchain[ chain_length+1 ].comp_offset_1 = 0; Lchain[ chain_length+1 ].comp_offset_2 = 3+i; dif = Lchain[ chain_length ].value - Lchain[ chain_length-3-i ].value; k = 1; while( dif < Lchain[ chain_length-k ].value ) k++; ASSERT( (dif == Lchain[ chain_length-k ].value) && (k < 15) ); Lchain[ chain_length+1 ].dif_offset = k; chain_length++; break; } case 15: { i = (uint8_t)( chain_code & 0x3 ); chain_code >>= 2; Lchain[ chain_length+1 ].value = Lchain[ chain_length-1 ].value + Lchain[ chain_length-2-i ].value; Lchain[ chain_length+1 ].comp_offset_1 = 1; Lchain[ chain_length+1 ].comp_offset_2 = 2+i; dif = Lchain[ chain_length-1 ].value - Lchain[ chain_length-2-i ].value; k = 2; while( dif < Lchain[ chain_length-k ].value ) k++; ASSERT( (dif == Lchain[ chain_length-k ].value) && (k < 15) ); Lchain[ chain_length+1 ].dif_offset = k; chain_length++; break; } } } /* finish the chain with maximum continuations until prime is reached */ i = 1; while( Lchain[chain_length].value < prime ) { max_continuation( Lchain, &chain_length, i ); } ASSERT( Lchain[ chain_length ].value == prime ); return chain_length; } /* extend the chain with maximum elements for i steps */ static void max_continuation( chain_element *Lchain, uint8_t *chain_length, uint8_t i ) { uint8_t k; uint64_t dif; Lchain[ *chain_length+1 ].value = Lchain[ *chain_length ].value + Lchain[ *chain_length-1 ].value; Lchain[ *chain_length+1 ].comp_offset_1 = 0; Lchain[ *chain_length+1 ].comp_offset_2 = 1; dif = Lchain[ *chain_length ].value - Lchain[ *chain_length-1 ].value; k = 2; while( dif < Lchain[ *chain_length-k ].value ) k++; ASSERT( (dif == Lchain[ *chain_length-k ].value) && (k < 15) ); Lchain[ *chain_length+1 ].dif_offset = k; (*chain_length)++; if( i > 1 ) { for( k = 1; k < i; k++) { Lchain[ *chain_length+1 ].value = Lchain[ *chain_length ].value + Lchain[ *chain_length-1 ].value; Lchain[ *chain_length+1 ].comp_offset_1 = 0; Lchain[ *chain_length+1 ].comp_offset_2 = 1; Lchain[ *chain_length+1 ].dif_offset = 2; (*chain_length)++; } } } /* Input: x is initial point A is curve parameter in Montgomery's form: g*y^2*z = x^3 + a*x^2*z + x*z^2 n is the number to factor B1 is the stage 1 bound Output: If a factor is found, it is returned in f. Otherwise, x contains the x-coordinate of the point computed in stage 1 (with z coordinate normalized to 1). B1done is set to B1 if stage 1 completed normally, or to the largest prime processed if interrupted, but never to a smaller value than B1done was upon function entry. Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise ECM_NO_FACTOR_FOUND */ static int ecm_stage1 (mpz_t f, mpres_t x, mpres_t A, mpmod_t n, double B1, double *B1done, mpz_t go, int (*stop_asap)(void), char *chkfilename) { mpres_t b, z, u, v, w, xB, zB, xC, zC, xT, zT, xT2, zT2; uint64_t p, r, last_chkpnt_p; int ret = ECM_NO_FACTOR_FOUND; long last_chkpnt_time; prime_info_t prime_info; /* Lucas chain code file mods */ uint64_t chain_code; uint8_t dum, chain_length; int32_t i; FILE *chain_code_file = NULL; chain_element Lchain[64]; uint8_t using_code_file; /* logical */ /* Elliptic curve states as we follow the Lucas chain */ mpres_t LCS_x[16]; mpres_t LCS_z[16]; uint8_t base_indx, next_indx, s1_indx, s2_indx, dif_indx; /* end mods */ prime_info_init (prime_info); mpres_init (b, n); mpres_init (z, n); mpres_init (u, n); mpres_init (v, n); mpres_init (w, n); mpres_init (xB, n); mpres_init (zB, n); mpres_init (xC, n); mpres_init (zC, n); mpres_init (xT, n); mpres_init (zT, n); mpres_init (xT2, n); mpres_init (zT2, n); /* Lucas chain code file mods */ for(i = 0; i < 16; i++) { mpres_init ( LCS_x[i], n); mpres_init ( LCS_z[i], n); } using_code_file = 0; if( B1 > *B1done ) { chain_code_file = fopen("Lchain_codes.dat", "r"); if(chain_code_file != (FILE *)NULL ) { using_code_file = 1; outputf (OUTPUT_NORMAL, "Using Lucas chain codes\n"); /* the first 3 chain elements are always value = 1, 2, and 3, respectively */ Lchain[0].value = 1; Lchain[1].value = 2; Lchain[1].comp_offset_1 = 0; Lchain[1].comp_offset_2 = 0; Lchain[1].dif_offset = 0; Lchain[2].value = 3; Lchain[2].comp_offset_1 = 0; Lchain[2].comp_offset_2 = 1; Lchain[2].dif_offset = 1; } else { outputf (OUTPUT_VERBOSE, "Lchain_codes.dat file failed to open, using prac\n"); } } chain_length = 0; /* not necessary but stops a compiler warning */ /* end mods */ last_chkpnt_time = cputime (); mpres_set_ui (z, 1, n); mpres_add_ui (b, A, 2, n); mpres_div_2exp (b, b, 2, n); /* b == (A0+2)*B/4 */ /* preload group order */ if (go != NULL) ecm_mul (x, z, go, n, b); /* prac() wants multiplicands > 2 */ for (r = 2; r <= B1; r *= 2) if (r > *B1done) duplicate (x, z, x, z, n, b, u, v, w); /* We'll do 3 manually, too (that's what ecm4 did..) */ for (r = 3; r <= B1; r *= 3) if (r > *B1done) { duplicate (xB, zB, x, z, n, b, u, v, w); add3 (x, z, x, z, xB, zB, x, z, n, u, v, w); } if( using_code_file ) { /* copy x & z to LCS_x[0] & lCS_z[0] */ mpres_set_z (LCS_x[0], x, n); mpres_set_z (LCS_z[0], z, n); base_indx = 0; } last_chkpnt_p = 3; p = getprime_mt (prime_info); /* Puts 3 into p. Next call gives 5 */ for (p = getprime_mt (prime_info); p <= B1; p = getprime_mt (prime_info)) { if(using_code_file) { if(p >= 11) /* code file starts at p = 11 */ dum = fread((int8_t *)&chain_code, sizeof(uint64_t), 1, chain_code_file); if(dum || (p < 11)) { /* the following call doesn't need a chain code for p < 11 */ chain_length = generate_Lucas_chain( p, chain_code, Lchain ); } else /* reached end of code file; revert to use prac */ { /* copy final LCS_x,z values back to x,z */ mpres_set_z (x, LCS_x[base_indx], n); mpres_set_z (z, LCS_z[base_indx], n); fclose(chain_code_file); using_code_file = 0; outputf (OUTPUT_NORMAL, "Reached Lchain_codes.dat EOF at p = %lu, reverting to use prac\n", p); } } /* end if( using_code_file) */ for (r = p; r <= B1; r *= p) { if (r > *B1done) { if(using_code_file) { next_indx = (base_indx + 1) & 0xF; duplicate (LCS_x[next_indx], LCS_z[next_indx], LCS_x[base_indx], LCS_z[base_indx], n, b, u, v, w); base_indx = next_indx; for(i=2; i<=chain_length; i++) { next_indx = (base_indx + 1) & 0xF; s1_indx = (base_indx - Lchain[i].comp_offset_1) & 0xF; if(Lchain[i].dif_offset == 0) { duplicate (LCS_x[next_indx], LCS_z[next_indx], LCS_x[s1_indx], LCS_z[s1_indx], n, b, u, v, w); } else { s2_indx = (base_indx - Lchain[i].comp_offset_2) & 0xF; dif_indx = (base_indx - Lchain[i].dif_offset) & 0xF; add3 (LCS_x[next_indx], LCS_z[next_indx], LCS_x[s1_indx], LCS_z[s1_indx], LCS_x[s2_indx], LCS_z[s2_indx], LCS_x[dif_indx], LCS_z[dif_indx], n, u, v, w); } base_indx = next_indx; } } else prac (x, z, (ecm_uint) p, n, b, u, v, w, xB, zB, xC, zC, xT, zT, xT2, zT2); } if(using_code_file) { if (mpres_is_zero (LCS_z[base_indx], n)) { outputf (OUTPUT_VERBOSE, "Reached point at infinity, %.0f divides " "group orders\n", p); break; } } else { if (mpres_is_zero (z, n)) { outputf (OUTPUT_VERBOSE, "Reached point at infinity, %.0f divides " "group orders\n", p); break; } } if (stop_asap != NULL && (*stop_asap) ()) { outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); break; } if (chkfilename != NULL && p > last_chkpnt_p + 10000 && elltime (last_chkpnt_time, cputime ()) > CHKPNT_PERIOD) { if(using_code_file) writechkfile (chkfilename, ECM_ECM, MAX(p, *B1done), n, A, LCS_x[base_indx], NULL, LCS_z[base_indx]); else writechkfile (chkfilename, ECM_ECM, MAX(p, *B1done), n, A, x, NULL, z); last_chkpnt_p = p; last_chkpnt_time = cputime (); } } } /* If stage 1 finished normally, p is the smallest prime >B1 here. In that case, set to B1 */ if (p > B1) p = B1; if (p > *B1done) *B1done = p; if(using_code_file) { /* copy final LCS_x,z values back to x,z */ mpres_set_z (x, LCS_x[base_indx], n); mpres_set_z (z, LCS_z[base_indx], n); fclose(chain_code_file); } if (chkfilename != NULL) writechkfile (chkfilename, ECM_ECM, *B1done, n, A, x, NULL, z); prime_info_clear (prime_info); if (!mpres_invert (u, z, n)) /* Factor found? */ { mpres_gcd (f, z, n); ret = ECM_FACTOR_FOUND_STEP1; } mpres_mul (x, x, u, n); for(i = 15; i >= 0; i--) { mpres_clear ( LCS_z[i], n); mpres_clear ( LCS_x[i], n); } mpres_clear (zT2, n); mpres_clear (xT2, n); mpres_clear (zT, n); mpres_clear (xT, n); mpres_clear (zC, n); mpres_clear (xC, n); mpres_clear (zB, n); mpres_clear (xB, n); mpres_clear (w, n); mpres_clear (v, n); mpres_clear (u, n); mpres_clear (z, n); mpres_clear (b, n); return ret; } #define DEBUG_EC_W 0 #ifdef HAVE_ADDLAWS /* Input: when Etype == ECM_EC_TYPE_WEIERSTRASS*: (x, y) is initial point A is curve parameter in Weierstrass's form: Y^2 = X^3 + A*X + B, where B = y^2-(x^3+A*x) is implicit when Etype == ECM_EC_TYPE_TWISTED_HESSIAN: (x, y) is initial point A=a/d is curve parameter in Hessian form: a*X^3+Y^3+Z^3=d*X*Y*Z n is the number to factor B1 is the stage 1 bound batch_s = prod(p^e <= B1) if != 1 Output: If a factor is found, it is returned in f. Otherwise, (x, y) contains the point computed in stage 1. B1done is set to B1 if stage 1 completed normally, or to the largest prime processed if interrupted, but never to a smaller value than B1done was upon function entry. Return value: ECM_FACTOR_FOUND_STEP1 if a factor is found, otherwise ECM_NO_FACTOR_FOUND */ static int ecm_stage1_W (mpz_t f, ell_curve_t E, ell_point_t P, mpmod_t n, double B1, double *B1done, mpz_t batch_s, mpz_t go, int (*stop_asap)(void), char *chkfilename) { mpres_t xB; ell_point_t Q; uint64_t p = 0, r, last_chkpnt_p; int ret = ECM_NO_FACTOR_FOUND, status; long last_chkpnt_time; prime_info_t prime_info; prime_info_init (prime_info); mpres_init (xB, n); ell_point_init(Q, E, n); last_chkpnt_time = cputime (); #if DEBUG_EC_W >= 2 gmp_printf("N:=%Zd;\n", n->orig_modulus); printf("E:="); ell_curve_print(E, n); printf("E:=[E[4], E[5]];\n"); printf("P:="); ell_point_print(P, E, n); printf("; Q:=P;\n"); #endif /* preload group order */ if (go != NULL){ if (ell_point_mul (f, Q, go, P, E, n) == 0){ ret = ECM_FACTOR_FOUND_STEP1; goto end_of_stage1_w; } ell_point_set(P, Q, E, n); } #if DEBUG_EC_W >= 1 gmp_printf("go:=%Zd;\n", go); printf("goP:="); ell_point_print(P, E, n); printf(";\n"); #endif if(mpz_cmp_ui(batch_s, 1) == 0){ outputf (OUTPUT_VERBOSE, "Using traditional approach to Step 1\n"); for (r = 2; r <= B1; r *= 2) if (r > *B1done){ if(ell_point_duplicate (f, Q, P, E, n) == 0){ ret = ECM_FACTOR_FOUND_STEP1; goto end_of_stage1_w; } ell_point_set(P, Q, E, n); #if DEBUG_EC_W >= 2 printf("P%ld:=", (long)r); ell_point_print(P, E, n); printf(";\n"); printf("Q:=EcmMult(2, Q, E, N);\n"); printf("(Q[1]*P%ld[3]-Q[3]*P%ld[1]) mod N;\n",(long)r,(long)r); #endif } last_chkpnt_p = 3; for (p = getprime_mt (prime_info); p <= B1; p = getprime_mt (prime_info)){ for (r = p; r <= B1; r *= p){ if (r > *B1done){ mpz_set_ui(f, (ecm_uint)p); status = ell_point_mul (f, Q, f, P, E, n); if(status == 0){ } else if(E->law == ECM_LAW_HOMOGENEOUS){ if(E->type == ECM_EC_TYPE_TWISTED_HESSIAN) mpres_gcd(f, Q->x, n); else mpres_gcd(f, Q->z, n); // gmp_printf("gcd=%Zd\n", f); if(mpz_cmp(f, n->orig_modulus) < 0 && mpz_cmp_ui(f, 1) > 0) status = 0; } if(status == 0){ ret = ECM_FACTOR_FOUND_STEP1; goto end_of_stage1_w; } #if DEBUG_EC_W >= 2 printf("R%ld:=", (long)r); ell_point_print(Q, E, n); printf(";\nQ:=EcmMult(%ld, Q, E, N);\n", (long)p); printf("(Q[1]*R%ld[3]-Q[3]*R%ld[1]) mod N;\n",(long)r,(long)r); #endif ell_point_set(P, Q, E, n); } } if (ell_point_is_zero (P, E, n)){ outputf (OUTPUT_VERBOSE, "Reached point at infinity, " "%.0f divides group orders\n", p); break; } if (stop_asap != NULL && (*stop_asap) ()){ outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); break; } if (chkfilename != NULL && p > last_chkpnt_p + 10000 && elltime (last_chkpnt_time, cputime ()) > CHKPNT_PERIOD){ writechkfile (chkfilename, ECM_ECM, MAX(p, *B1done), n, E->a4, P->x, P->y, P->z); last_chkpnt_p = p; last_chkpnt_time = cputime (); } } } else{ #if USE_ADD_SUB_CHAINS == 0 /* keeping it simple */ if (ell_point_mul (f, Q, batch_s, P, E, n) == 0){ ret = ECM_FACTOR_FOUND_STEP1; goto end_of_stage1_w; } #else /* batch mode and special coding... */ short *S = NULL; size_t iS; int w; add_sub_unpack(&w, &S, &iS, batch_s); if (ell_point_mul_add_sub_with_S(f, Q, P, E, n, w, S, iS) == 0){ ret = ECM_FACTOR_FOUND_STEP1; } #endif ell_point_set(P, Q, E, n); p = B1; } end_of_stage1_w: /* If stage 1 finished normally, p is the smallest prime > B1 here. In that case, set to B1 */ if (p > B1) p = B1; if (p > *B1done) *B1done = p; if (chkfilename != NULL) writechkfile (chkfilename, ECM_ECM, *B1done, n, E->a4, P->x, P->y,P->z); prime_info_clear (prime_info); #if DEBUG_EC_W >= 2 printf("lastP="); ell_point_print(P, E, n); printf("\n"); #endif if(ret != ECM_FACTOR_FOUND_STEP1){ if(ell_point_is_zero(P, E, n) == 1){ /* too bad */ ell_point_set_to_zero(P, E, n); mpz_set(f, n->orig_modulus); ret = ECM_FACTOR_FOUND_STEP1; } else{ /* for affine case, z = 1 anyway */ if(E->law == ECM_LAW_HOMOGENEOUS){ if (!mpres_invert (xB, P->z, n)){ /* Factor found? */ mpres_gcd (f, P->z, n); gmp_printf("# factor found during normalization: %Zd\n", f); ret = ECM_FACTOR_FOUND_STEP1; } else{ /* normalize to get (x:y:1) valid in W or H form... */ #if DEBUG_EC_W >= 2 mpres_get_z(f, xB, n); gmp_printf("1/z=%Zd\n", f); #endif mpres_mul (P->x, P->x, xB, n); mpres_mul (P->y, P->y, xB, n); #if DEBUG_EC_W >= 2 mpres_get_z(f, P->x, n); gmp_printf("x/z=%Zd\n", f); mpres_get_z(f, P->y, n); gmp_printf("y/z=%Zd\n", f); #endif mpres_set_ui (P->z, 1, n); } } } } mpres_clear (xB, n); ell_point_clear(Q, E, n); return ret; } #endif /* choose "optimal" S according to step 2 range B2 */ int choose_S (mpz_t B2len) { if (mpz_cmp_d (B2len, 1e7) < 0) return 1; /* x^1 */ else if (mpz_cmp_d (B2len, 1e8) < 0) return 2; /* x^2 */ else if (mpz_cmp_d (B2len, 1e9) < 0) return -3; /* Dickson(3) */ else if (mpz_cmp_d (B2len, 1e10) < 0) return -6; /* Dickson(6) */ else if (mpz_cmp_d (B2len, 3e11) < 0) return -12; /* Dickson(12) */ else return -30; /* Dickson(30) */ } #define DIGITS_START 35 #define DIGITS_INCR 5 #define DIGITS_END 80 void print_expcurves (double B1, const mpz_t B2, unsigned long dF, unsigned long k, int S, int param) { double prob; int i, j; char sep, outs[128], flt[16]; double smoothness_correction; if (param == ECM_PARAM_SUYAMA || param == ECM_PARAM_BATCH_2) smoothness_correction = 1.0; else if (param == ECM_PARAM_BATCH_SQUARE) smoothness_correction = EXTRA_SMOOTHNESS_SQUARE; else if (param == ECM_PARAM_BATCH_32BITS_D) smoothness_correction = EXTRA_SMOOTHNESS_32BITS_D; else /* This case should never happen */ smoothness_correction = 0.0; for (i = DIGITS_START, j = 0; i <= DIGITS_END; i += DIGITS_INCR) j += sprintf (outs + j, "%u%c", i, (i < DIGITS_END) ? '\t' : '\n'); outs[j] = '\0'; outputf (OUTPUT_VERBOSE, "Expected number of curves to find a factor " "of n digits (assuming one exists):\n%s", outs); for (i = DIGITS_START; i <= DIGITS_END; i += DIGITS_INCR) { sep = (i < DIGITS_END) ? '\t' : '\n'; prob = ecmprob (B1, mpz_get_d (B2), /* smoothness depends on the parametrization */ pow (10., i - .5) / smoothness_correction, (double) dF * dF * k, S); if (prob > 1. / 10000000) outputf (OUTPUT_VERBOSE, "%.0f%c", floor (1. / prob + .5), sep); else if (prob > 0.) { /* on Windows: 2.6e+009 or 2.6e+025 (8 characters in string) */ /* on Linux : 2.6e+09 or 2.6e+25 (7 characters in string) */ /* This will normalize the output so that the Windows ouptut will match the Linux output */ if (sprintf (flt, "%.2g", floor (1. / prob + .5)) == 8) memmove (&flt[5], &flt[6], strlen(flt) - 5); outputf (OUTPUT_VERBOSE, "%s%c", flt, sep); } else outputf (OUTPUT_VERBOSE, "Inf%c", sep); } } void print_exptime (double B1, const mpz_t B2, unsigned long dF, unsigned long k, int S, double tottime, int param) { double prob, exptime; int i, j; char sep, outs[128]; double smoothness_correction; if (param == ECM_PARAM_SUYAMA || param == ECM_PARAM_BATCH_2) smoothness_correction = 1.0; else if (param == ECM_PARAM_BATCH_SQUARE) smoothness_correction = EXTRA_SMOOTHNESS_SQUARE; else if (param == ECM_PARAM_BATCH_32BITS_D) smoothness_correction = EXTRA_SMOOTHNESS_32BITS_D; else /* This case should never happen */ smoothness_correction = 0.0; for (i = DIGITS_START, j = 0; i <= DIGITS_END; i += DIGITS_INCR) j += sprintf (outs + j, "%u%c", i, (i < DIGITS_END) ? '\t' : '\n'); outs[j] = '\0'; outputf (OUTPUT_VERBOSE, "Expected time to find a factor of n digits:\n%s", outs); for (i = DIGITS_START; i <= DIGITS_END; i += DIGITS_INCR) { sep = (i < DIGITS_END) ? '\t' : '\n'; prob = ecmprob (B1, mpz_get_d (B2), /* in batch mode, the extra smoothness is smaller */ pow (10., i - .5) / smoothness_correction, (double) dF * dF * k, S); exptime = (prob > 0.) ? tottime / prob : HUGE_VAL; outputf (OUTPUT_TRACE, "Digits: %d, Total time: %.0f, probability: " "%g, expected time: %.0f\n", i, tottime, prob, exptime); if (exptime < 1000.) outputf (OUTPUT_VERBOSE, "%.0fms%c", exptime, sep); else if (exptime < 60000.) /* One minute */ outputf (OUTPUT_VERBOSE, "%.2fs%c", exptime / 1000., sep); else if (exptime < 3600000.) /* One hour */ outputf (OUTPUT_VERBOSE, "%.2fm%c", exptime / 60000., sep); else if (exptime < 86400000.) /* One day */ outputf (OUTPUT_VERBOSE, "%.2fh%c", exptime / 3600000., sep); else if (exptime < 31536000000.) /* One year */ outputf (OUTPUT_VERBOSE, "%.2fd%c", exptime / 86400000., sep); else if (exptime < 31536000000000.) /* One thousand years */ outputf (OUTPUT_VERBOSE, "%.2fy%c", exptime / 31536000000., sep); else if (exptime < 31536000000000000.) /* One million years */ outputf (OUTPUT_VERBOSE, "%.0fy%c", exptime / 31536000000., sep); else if (prob > 0.) outputf (OUTPUT_VERBOSE, "%.1gy%c", exptime / 31536000000., sep); else outputf (OUTPUT_VERBOSE, "Inf%c", sep); } } /* y should be NULL for P+1, and P-1, it contains the y coordinate for the Weierstrass form for ECM (when sigma_is_A = -1). */ void print_B1_B2_poly (int verbosity, int method, double B1, double B1done, const mpz_t B2min_param, const mpz_t B2min, const mpz_t B2, int S, const mpz_t sigma, int sigma_is_A, int Etype, const mpz_t y, int param, unsigned int nb_curves) { ASSERT ((method == ECM_ECM) || (y == NULL)); ASSERT ((-1 <= sigma_is_A) && (sigma_is_A <= 1)); ASSERT (param != ECM_PARAM_DEFAULT || sigma_is_A == 1 || sigma_is_A == -1); if (test_verbose (verbosity)) { outputf (verbosity, "Using "); if (ECM_IS_DEFAULT_B1_DONE(B1done)) outputf (verbosity, "B1=%1.0f, ", B1); else outputf (verbosity, "B1=%1.0f-%1.0f, ", B1done, B1); if (mpz_sgn (B2min_param) < 0) outputf (verbosity, "B2=%Zd", B2); else outputf (verbosity, "B2=%Zd-%Zd", B2min, B2); if (S > 0) outputf (verbosity, ", polynomial x^%u", S); else if (S < 0) outputf (verbosity, ", polynomial Dickson(%u)", -S); /* don't print in resume case, since x0 is saved in resume file */ if (method == ECM_ECM) { if (sigma_is_A == 1) outputf (verbosity, ", A=%Zd", sigma); else if (sigma_is_A == 0) { if (nb_curves > 1) { mpz_t end; mpz_init(end); mpz_add_ui (end, sigma, nb_curves-1); outputf (verbosity, ", sigma=%d:%Zd-%Zd", param, sigma, end); mpz_clear(end); outputf (verbosity, " (%u curves)", nb_curves); } else outputf (verbosity, ", sigma=%d:%Zd", param, sigma); } else{ if (Etype == ECM_EC_TYPE_WEIERSTRASS) outputf (verbosity, ", Weierstrass(A=%Zd,y=%Zd)", sigma, y); else if (Etype == ECM_EC_TYPE_HESSIAN) outputf (verbosity, ", Hessian(D=%Zd,y=%Zd)", sigma, y); else if (Etype == ECM_EC_TYPE_TWISTED_HESSIAN) outputf (verbosity, ", twisted Hessian(y=%Zd)", y); } } else if (ECM_IS_DEFAULT_B1_DONE(B1done)) /* in case of P-1 or P+1, we store the initial point in sigma */ outputf (verbosity, ", x0=%Zd", sigma); outputf (verbosity, "\n"); } } /* Compute parameters for stage 2 */ int set_stage_2_params (mpz_t B2, const mpz_t B2_parm, mpz_t B2min, const mpz_t B2min_parm, root_params_t *root_params, double B1, unsigned long *k, const int S, int use_ntt, int *po2, unsigned long *dF, char *TreeFilename, double maxmem, int Fermat, mpmod_t modulus) { mpz_set (B2min, B2min_parm); mpz_set (B2, B2_parm); mpz_init (root_params->i0); /* set second stage bound B2: when using polynomial multiplication of complexity n^alpha, stage 2 has complexity about B2^(alpha/2), and we want stage 2 to take about half of stage 1, thus we choose B2 = (c*B1)^(2/alpha). Experimentally, c=1/4 seems to work well. For Toom-Cook 3, this gives alpha=log(5)/log(3), and B2 ~ (c*B1)^1.365. For Toom-Cook 4, this gives alpha=log(7)/log(4), and B2 ~ (c*B1)^1.424. */ /* We take the cost of P+1 stage 1 to be about twice that of P-1. Since nai"ve P+1 and ECM cost respectively 2 and 11 multiplies per addition and duplicate, and both are optimized with PRAC, we can assume the ratio remains about 11/2. */ /* Also scale B2 by what the user said (or by the default scaling of 1.0) */ if (ECM_IS_DEFAULT_B2(B2)) mpz_set_d (B2, pow (ECM_COST * B1, DEFAULT_B2_EXPONENT)); /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); /* Let bestD determine parameters for root generation and the effective B2 */ if (use_ntt) *po2 = 1; root_params->d2 = 0; /* Enable automatic choice of d2 */ if (bestD (root_params, k, dF, B2min, B2, *po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) return ECM_ERROR; /* Set default degree for Brent-Suyama extension */ /* We try to keep the time used by the Brent-Suyama extension at about 10% of the stage 2 time */ /* Degree S Dickson polys and x^S are equally fast for ECM, so we go for the better Dickson polys whenever possible. For S == 1, 2, they behave identically. */ root_params->S = S; if (root_params->S == ECM_DEFAULT_S) { if (Fermat > 0) { /* For Fermat numbers, default is 1 (no Brent-Suyama) */ root_params->S = 1; } else { mpz_t t; mpz_init (t); mpz_sub (t, B2, B2min); root_params->S = choose_S (t); mpz_clear (t); } } return ECM_NO_FACTOR_FOUND; } /* Input: x is starting point or zero y is used for Weierstrass curves (even in Step1) sigma is sigma value (if x is set to zero) or A parameter (if x is non-zero) of curve n is the number to factor go is the initial group order to preload B1, B2 are the stage 1/stage 2 bounds, respectively B2min the lower bound for stage 2 k is the number of blocks to do in stage 2 S is the degree of the Suyama-Brent extension for stage 2 verbose is verbosity level: 0 no output, 1 normal output, 2 diagnostic output. sigma_is_A: If true, the sigma parameter contains the curve's A value Etype zE is a curve that is used when a special torsion group was used; in that case, (x, y) must be a point on E. Output: f is the factor found. Return value: ECM_FACTOR_FOUND_STEPn if a factor was found, ECM_NO_FACTOR_FOUND if no factor was found, ECM_ERROR in case of error. (x, y) contains the new point at the end of Stage 1. */ int ecm (mpz_t f, mpz_t x, mpz_t y, int param, mpz_t sigma, mpz_t n, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, unsigned long k, const int S, int verbose, int repr, int nobase2step2, int use_ntt, int sigma_is_A, ell_curve_t zE, FILE *os, FILE* es, char *chkfilename, char *TreeFilename, double maxmem, double stage1time, gmp_randstate_t rng, int (*stop_asap)(void), mpz_t batch_s, double *batch_last_B1_used, ATTRIBUTE_UNUSED double gw_k, ATTRIBUTE_UNUSED unsigned long gw_b, ATTRIBUTE_UNUSED unsigned long gw_n, ATTRIBUTE_UNUSED signed long gw_c, ATTRIBUTE_UNUSED signed long gw_cl_flag) { int youpi = ECM_NO_FACTOR_FOUND; int base2 = 0; /* If n is of form 2^n[+-]1, set base to [+-]n */ int Fermat = 0; /* If base2 > 0 is a power of 2, set Fermat to base2 */ int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpmod_t modulus; curve P; ell_curve_t E; #ifdef HAVE_ADDLAWS ell_point_t PE; #endif mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; #ifdef HAVE_GWNUM unsigned long kbnc_bitsize; unsigned long gw_knbc_threshold; unsigned long gw_spec_div_threshold; unsigned long gw_generic_threshold; #endif root_params_t root_params; /* 1: sigma contains A from Montgomery form By^2 = x^3 + Ax^2 + x 0: sigma contains sigma -1: sigma contains A from Weierstrass form y^2 = x^3 + Ax + B, and y contains y */ ASSERT((-1 <= sigma_is_A) && (sigma_is_A <= 1)); ASSERT((GMP_NUMB_BITS == 32) || (GMP_NUMB_BITS == 64)); set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; #ifdef MPRESN_NO_ADJUSTMENT /* When no adjustment is made in mpresn_ functions, N should be smaller than B^n/16 */ if (mpz_sizeinbase (n, 2) > mpz_size (n) * GMP_NUMB_BITS - 4) { outputf (OUTPUT_ERROR, "Error, N should be smaller than B^n/16\n"); return ECM_ERROR; } #endif /* if a batch mode is requested by the user, this implies ECM_MOD_MODMULN */ if (repr == ECM_MOD_DEFAULT && IS_BATCH_MODE(param)) repr = ECM_MOD_MODMULN; /* choose the arithmetic used before the parametrization, since for divisors of 2^n+/-1 the default choice param=1 might not be optimal */ if (mpmod_init (modulus, n, repr) != 0) return ECM_ERROR; repr = modulus->repr; /* If the parametrization is not given, choose it. */ if (param == ECM_PARAM_DEFAULT) param = get_default_param (sigma_is_A, *B1done, repr); /* when dealing with several input numbers, if we had already computed batch_s, but the new number uses the base-2 representation, then we are forced to use ECM_PARAM_SUYAMA, and we reset batch_s to 1 to avoid the error "-bsaves/-bloads makes sense in batch mode only" below */ if (param == ECM_PARAM_SUYAMA) mpz_set_ui (batch_s, 1); /* In batch mode, we force repr=MODMULN, B1done should be either the default value or greater than B1 x should be either 0 (undetermined) or 2 */ if (IS_BATCH_MODE(param)) { if (repr != ECM_MOD_MODMULN) { outputf (OUTPUT_ERROR, "Error, with param %d, repr should be " "ECM_MOD_MODMULN.\n", param); return ECM_ERROR; } if (!ECM_IS_DEFAULT_B1_DONE(*B1done) && *B1done < B1) { outputf (OUTPUT_ERROR, "Error, cannot resume with param %d, except " "for doing only stage 2\n", param); return ECM_ERROR; } if (sigma_is_A >= 0 && mpz_sgn (x) != 0 && mpz_cmp_ui (x, 2) != 0) { if (ECM_IS_DEFAULT_B1_DONE(*B1done)) { outputf (OUTPUT_ERROR, "Error, x0 should be equal to 2 with this " "parametrization\n"); return ECM_ERROR; } } } /* check that if ECM_PARAM_BATCH_SQUARE is used, GMP_NUMB_BITS == 64 */ if (param == ECM_PARAM_BATCH_SQUARE && GMP_NUMB_BITS == 32) { outputf (OUTPUT_ERROR, "Error, parametrization ECM_PARAM_BATCH_SQUARE " "works only with GMP_NUMB_BITS=64\n"); return ECM_ERROR; } /* check that B1 is not too large */ if (B1 > (double) ECM_UINT_MAX) { outputf (OUTPUT_ERROR, "Error, maximal step 1 bound for ECM is %lu.\n", ECM_UINT_MAX); return ECM_ERROR; } /* See what kind of number we have as that may influence optimal parameter selection. Test for base 2 number. Note: this was already done by mpmod_init. */ if (modulus->repr == ECM_MOD_BASE2) base2 = modulus->bits; /* For a Fermat number (base2 a positive power of 2) */ for (Fermat = base2; Fermat > 0 && (Fermat & 1) == 0; Fermat >>= 1); if (Fermat == 1) { Fermat = base2; po2 = 1; } else Fermat = 0; mpres_init (P.x, modulus); mpres_init (P.y, modulus); mpres_init (P.A, modulus); /* In resume case, set (P.x,P.y) from x. */ int resume = mpz_cmp_ui (x, 0); if (resume) { /* Call get_curve_from_param0() before setting P.x otherwise it would reset P.x. */ if (param == ECM_PARAM_SUYAMA) get_curve_from_param0 (f, P.A, P.x, sigma, modulus); else if (param == ECM_PARAM_BATCH_SQUARE) get_curve_from_param1 (P.A, P.x, sigma, modulus); else if (param == ECM_PARAM_BATCH_2) get_curve_from_param2 (f, P.A, P.x, sigma, modulus); else get_curve_from_param3 (P.A, P.x, sigma, modulus); mpres_set_z (P.x, x, modulus); mpres_set_ui (P.y, 1, modulus); } #ifdef HAVE_ADDLAWS ell_curve_set_z (E, zE, modulus); #else E->type = ECM_EC_TYPE_MONTGOMERY; #endif mpz_init (B2); mpz_init (B2min); youpi = set_stage_2_params (B2, B2_parm, B2min, B2min_parm, &root_params, B1, &k, S, use_ntt, &po2, &dF, TreeFilename, maxmem, Fermat,modulus); /* if the user gave B2, print that B2 on the Using B1=..., B2=... line */ if(!ECM_IS_DEFAULT_B2(B2_parm)) mpz_set (B2, B2_parm); if (youpi == ECM_ERROR) goto end_of_ecm; if (!resume && sigma_is_A == 0) { if (mpz_sgn (sigma) == 0) { youpi = get_curve_from_random_parameter (f, P.A, P.x, sigma, param, modulus, rng); if (youpi == ECM_ERROR) { outputf (OUTPUT_ERROR, "Error, invalid parametrization.\n"); goto end_of_ecm; } } else /* Compute A and x0 from given sigma values */ { if (param == ECM_PARAM_SUYAMA) youpi = get_curve_from_param0 (f, P.A, P.x, sigma, modulus); else if (param == ECM_PARAM_BATCH_SQUARE) youpi = get_curve_from_param1 (P.A, P.x, sigma, modulus); else if (param == ECM_PARAM_BATCH_2) youpi = get_curve_from_param2 (f, P.A, P.x, sigma, modulus); else if (param == ECM_PARAM_BATCH_32BITS_D) youpi = get_curve_from_param3 (P.A, P.x, sigma, modulus); else { outputf (OUTPUT_ERROR, "Error, invalid parametrization.\n"); youpi = ECM_ERROR; goto end_of_ecm; } if (youpi != ECM_NO_FACTOR_FOUND) { if (youpi == ECM_ERROR) outputf (OUTPUT_ERROR, "Error, invalid value of sigma.\n"); goto end_of_ecm; } } } else if (sigma_is_A == 1) { /* sigma contains the A value */ mpres_set_z (P.A, sigma, modulus); /* TODO: make a valid, random starting point in case none was given */ /* Problem: this may be as hard as factoring as we'd need to determine whether x^3 + a*x^2 + x is a quadratic residue or not */ /* For now, we'll just chicken out. */ /* Except for batch mode where we know that x0=2 */ if (mpz_sgn (x) == 0) { if (IS_BATCH_MODE(param)) mpres_set_ui (P.x, 2, modulus); else { outputf (OUTPUT_ERROR, "Error, -A requires a starting point (-x0 x).\n"); youpi = ECM_ERROR; goto end_of_ecm; } } else mpres_set_z (P.x, x, modulus); } /* Print B1, B2, polynomial and sigma */ print_B1_B2_poly (OUTPUT_NORMAL, ECM_ECM, B1, *B1done, B2min_parm, B2min, B2, root_params.S, sigma, sigma_is_A, E->type, y, param, 0); #if 0 outputf (OUTPUT_VERBOSE, "b2=%1.0f, dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", b2, dF, k, root_params.d1, root_params.d2, root_params.i0); #else outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, root_params.i0); #endif if (sigma_is_A == -1) /* Weierstrass or Hessian form. It is known that all curves in Weierstrass form do not admit a Montgomery form. However, we could be interested in performing some plain Step 1 on some special curves. */ { if (param != ECM_PARAM_TORSION) { mpres_set_z (P.A, sigma, modulus); /* sigma contains A */ mpres_set_z (P.x, x, modulus); mpres_set_z (P.y, y, modulus); } else { #ifdef HAVE_ADDLAWS if(E->type == ECM_EC_TYPE_WEIERSTRASS) #endif mpres_set_z(P.A, zE->a4, modulus); #ifdef HAVE_ADDLAWS else if(E->type == ECM_EC_TYPE_MONTGOMERY) mpres_set_z(P.A, zE->a2, modulus); #endif mpres_set_z (P.x, x, modulus); mpres_set_z (P.y, y, modulus); } } if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t t; mpz_init (t); mpres_get_z (t, P.A, modulus); outputf (OUTPUT_RESVERBOSE, "A=%Zd\n", t); mpres_get_z (t, P.x, modulus); outputf (OUTPUT_RESVERBOSE, "starting point: x0=%Zd\n", t); #ifdef HAVE_ADDLAWS if (E->type == ECM_EC_TYPE_WEIERSTRASS) { mpres_get_z (t, P.y, modulus); outputf (OUTPUT_RESVERBOSE, " y0=%Zd\n", t); } #endif mpz_clear (t); } if (go != NULL && mpz_cmp_ui (go, 1) > 0) outputf (OUTPUT_VERBOSE, "initial group order: %Zd\n", go); if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_cmp_d (B2min, B1) != 0) { outputf (OUTPUT_VERBOSE, "Can't compute success probabilities for B1 <> B2min\n"); } else if (param == ECM_PARAM_DEFAULT) { outputf (OUTPUT_VERBOSE, "Can't compute success probabilities " "for this parametrization.\n"); } else { rhoinit (256, 10); print_expcurves (B1, B2, dF, k, root_params.S, param); } } /* Compute s for the batch mode */ if (IS_BATCH_MODE(param) && ECM_IS_DEFAULT_B1_DONE(*B1done) && (B1 != *batch_last_B1_used || mpz_cmp_ui (batch_s, 1) <= 0)) { *batch_last_B1_used = B1; st = cputime (); /* construct the batch exponent */ compute_s (batch_s, B1, NULL); outputf (OUTPUT_VERBOSE, "Computing batch product (of %" PRIu64 " bits) of primes up to B1=%1.0f took %ldms\n", mpz_sizeinbase (batch_s, 2), B1, elltime (st, cputime ())); } st = cputime (); #ifdef HAVE_GWNUM /* gwnum only used when param == 0 and command line did not include -force-no-gwnum */ if ((param == ECM_PARAM_SUYAMA) && (gw_cl_flag >= 0)) { /* set thresholds */ if (gw_cl_flag > 0) /* -force-gwnum specified in command line */ { /* set all thresholds to minimum */ gw_knbc_threshold = 350; gw_spec_div_threshold = 350; gw_generic_threshold = 350; } else /* use default thresholds */ { gw_knbc_threshold = GWNUM_KBNC_THRESHOLD; gw_spec_div_threshold = GWNUM_SPEC_DIV_THRESHOLD; gw_generic_threshold = GWNUM_GENERIC_THRESHOLD; } /* We will use GWNUM for numbers N which have a kbnc form (b != 0) when N > 2^gw_knbc_threshold */ /* we will check if N = (2^m +/- 1)/divisors (special division). In that case gwnum will be used only if m >= gw_spec_div_threshold */ /* We will use GWNUM for numbers N of any other form (b == 0) when N > 2^gw_generic_threshold */ /* check for special division case */ if (modulus->repr == ECM_MOD_BASE2) { if ( abs (modulus->bits) >= (unsigned long) gw_spec_div_threshold ) { /* use gwnum; kbnc params may not be set */ gw_k = 1.0; gw_b = 2; gw_n = abs (modulus->bits); if ( modulus->bits < 0 ) gw_c = -1; else gw_c = 1; } else { /* don't use gwnum */ gw_b = 0; gw_k = 0.0; } } else if ( gw_b != 0 ) { kbnc_bitsize = (unsigned long) ( (log(gw_k) + (double)gw_n * log((double)gw_b) )/log(2.0) ); if (kbnc_bitsize < (unsigned long) gw_knbc_threshold) { /* don't use gwnum */ gw_b = 0; gw_k = 0.0; } } else /* gw_b == 0 */ { /* gwnum generic is not faster until input number is much larger - roughly 600 decimal digits (TBD) */ if (mpz_sizeinbase(modulus->orig_modulus, 2) >= gw_generic_threshold) { gw_k = 1.0; /* indicate that input is large enough for gwnum generic */ if (verbose > OUTPUT_NORMAL) printf ("Did not find a gwnum poly; will use gwnum generic.\n"); } else { gw_k = 0.0; /* indicate that input is too small for gwnum generic */ if (verbose > OUTPUT_NORMAL) printf ("Did not find a gwnum poly; input number is too small for gwnum generic.\n"); } } if ((gw_b != 0 || (gw_b == 0 && gw_k >= 1.0) ) && B1 >= *B1done) youpi = gw_ecm_stage1 (f, &P, modulus, B1, B1done, go, gw_k, gw_b, gw_n, gw_c); } /* end if ((param == ECM_PARAM_SUYAMA) && (gw_cl_flag >= 0)) */ /* At this point B1 == *B1done unless interrupted, or no GWNUM ecm_stage1 is available */ if (youpi != ECM_NO_FACTOR_FOUND) { outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", elltime (st, cputime ())); goto end_of_ecm_rhotable; } #endif /* HAVE_GWNUM */ if (B1 > *B1done || mpz_cmp_ui (go, 1) > 0) { if (IS_BATCH_MODE(param)) /* FIXME: go, stop_asap and chkfilename are ignored in batch mode */ youpi = ecm_stage1_batch (f, P.x, P.A, modulus, B1, B1done, param, batch_s); else{ #ifdef HAVE_ADDLAWS if(E->type == ECM_EC_TYPE_MONTGOMERY) #endif youpi = ecm_stage1 (f, P.x, P.A, modulus, B1, B1done, go, stop_asap, chkfilename); #ifdef HAVE_ADDLAWS else{ ell_point_init (PE, E, modulus); mpres_set (PE->x, P.x, modulus); mpres_set (PE->y, P.y, modulus); youpi = ecm_stage1_W (f, E, PE, modulus, B1, B1done, batch_s, go, stop_asap, chkfilename); mpres_set (P.x, PE->x, modulus); mpres_set (P.y, PE->y, modulus); ell_point_clear (PE, E, modulus); } #endif } } else if (mpz_sgn (x) != 0) { /* when x <> 0, we initialize P to (x:y) */ mpres_set_z (P.x, x, modulus); mpres_set_z (P.y, y, modulus); } if (stage1time > 0.) { const long st2 = elltime (st, cputime ()); const long s1t = (long) (stage1time * 1000.); outputf (OUTPUT_NORMAL, "Step 1 took %ldms (%ld in this run, %ld from previous runs)\n", st2 + s1t, st2, s1t); } else outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", elltime (st, cputime ())); /* Store end-of-stage-1 residue in x in case we write it to a save file, before P.x is (perhaps) converted to Weierstrass form */ mpres_get_z (x, P.x, modulus); #ifdef HAVE_ADDLAWS if (E->type == ECM_EC_TYPE_WEIERSTRASS || E->type == ECM_EC_TYPE_HESSIAN || E->type == ECM_EC_TYPE_TWISTED_HESSIAN) mpres_get_z (y, P.y, modulus); #endif if (youpi != ECM_NO_FACTOR_FOUND) goto end_of_ecm_rhotable; if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t t; mpz_init (t); mpres_get_z (t, P.x, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", t); #ifdef HAVE_ADDLAWS if (E->type == ECM_EC_TYPE_WEIERSTRASS) { mpres_get_z (t, P.y, modulus); outputf (OUTPUT_RESVERBOSE, "y=%Zd\n", t); } #endif mpz_clear (t); } /* In case of a signal, we'll exit after the residue is printed. If no save file is specified, the user may still resume from the residue */ if (stop_asap != NULL && (*stop_asap) ()) goto end_of_ecm_rhotable; /* If using 2^k +/-1 modulus and 'nobase2step2' flag is set, set default (-nobase2) modular method and remap P.x, P.y, and P.A */ if (modulus->repr == ECM_MOD_BASE2 && nobase2step2) { mpz_t x_t, y_t, A_t; mpz_init (x_t); mpz_init (y_t); mpz_init (A_t); mpz_mod (x_t, P.x, modulus->orig_modulus); mpz_mod (y_t, P.y, modulus->orig_modulus); mpz_mod (A_t, P.A, modulus->orig_modulus); mpmod_clear (modulus); repr = ECM_MOD_NOBASE2; if (mpmod_init (modulus, n, repr) != 0) /* reset modulus for nobase2 */ return ECM_ERROR; /* remap x, y, and A for new modular method */ mpres_set_z (P.x, x_t, modulus); mpres_set_z (P.y, y_t, modulus); mpres_set_z (P.A, A_t, modulus); mpz_clear (x_t); mpz_clear (y_t); mpz_clear (A_t); } #ifdef HAVE_ADDLAWS if (E->type == ECM_EC_TYPE_MONTGOMERY) #endif youpi = montgomery_to_weierstrass (f, P.x, P.y, P.A, modulus); #ifdef HAVE_ADDLAWS else if (E->type == ECM_EC_TYPE_HESSIAN) { youpi = hessian_to_weierstrass (f, P.x, P.y, P.A, modulus); if(youpi == ECM_NO_FACTOR_FOUND) /* due to that non-trivial kernel(?) */ youpi = mult_by_3(f, P.x, P.y, P.A, modulus); } else if (E->type == ECM_EC_TYPE_TWISTED_HESSIAN) { mpz_t c, rm; mpz_init(c); mpz_init(rm); mpres_get_z(rm, E->a4, modulus); mpz_rootrem(c, rm, rm, 3); if(mpz_sgn(rm) != 0){ printf("ECM_EC_TYPE_TWISTED_HESSIAN: not a cube!\n"); exit(-1); } mpres_set_z(P.A, c, modulus); mpz_clear(c); mpz_clear(rm); youpi = twisted_hessian_to_weierstrass (f, P.x, P.y, P.A, E->a6, modulus); if(youpi == ECM_NO_FACTOR_FOUND){ /* due to that non-trivial kernel(?) */ youpi = mult_by_3(f, P.x, P.y, P.A, modulus); } } #endif if (test_verbose (OUTPUT_RESVERBOSE) && youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) { mpz_t t; mpz_init (t); mpres_get_z (t, P.x, modulus); outputf (OUTPUT_RESVERBOSE, "After switch to Weierstrass form, " "P=(%Zd", t); mpres_get_z (t, P.y, modulus); outputf (OUTPUT_RESVERBOSE, ", %Zd)\n", t); mpres_get_z (t, P.A, modulus); outputf (OUTPUT_RESVERBOSE, "on curve Y^2 = X^3 + %Zd * X + b\n", t); mpz_clear (t); } P.disc = 0; /* FIXME: should disappear one day */ if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) youpi = stage2 (f, &P, modulus, dF, k, &root_params, use_ntt, TreeFilename, 0, stop_asap); #ifdef TIMING_CRT printf ("mpzspv_from_mpzv_slow: %dms\n", mpzspv_from_mpzv_slow_time); printf ("mpzspv_to_mpzv: %dms\n", mpzspv_to_mpzv_time); printf ("mpzspv_normalise: %dms\n", mpzspv_normalise_time); #endif end_of_ecm_rhotable: if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_cmp_d (B2min, B1) == 0 && param != ECM_PARAM_DEFAULT) { if (youpi == ECM_NO_FACTOR_FOUND && (stop_asap == NULL || !(*stop_asap)())) print_exptime (B1, B2, dF, k, root_params.S, (long) (stage1time * 1000.) + elltime (st, cputime ()), param); rhoinit (1, 0); /* Free memory of rhotable */ } } end_of_ecm: #ifdef HAVE_ADDLAWS ell_curve_clear(E, modulus); #endif mpres_clear (P.A, modulus); mpres_clear (P.y, modulus); mpres_clear (P.x, modulus); mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); mpmod_clear (modulus); return youpi; }