/* ------------------------------------------------------------------------------------ Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. SPDX-License-Identifier: Apache-2.0 OR ISC ------------------------------------------------------------------------------------ */ // Implementation of P-521 that uses Fiat-crypto for the field arithmetic // found in third_party/fiat, similarly to p256.c and p384.c #include #include #include #include #include "../bn/internal.h" #include "../cpucap/internal.h" #include "../delocate.h" #include "internal.h" #include "ec_nistp.h" #if !defined(OPENSSL_SMALL) #if defined(EC_NISTP_USE_S2N_BIGNUM) # include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" #else # if defined(EC_NISTP_USE_64BIT_LIMB) # include "../../../third_party/fiat/p521_64.h" # else # include "../../../third_party/fiat/p521_32.h" # endif #endif #if defined(EC_NISTP_USE_S2N_BIGNUM) #define P521_NLIMBS (9) typedef uint64_t p521_limb_t; typedef uint64_t p521_felem[P521_NLIMBS]; // field element static const p521_limb_t p521_felem_one[P521_NLIMBS] = { 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}; // The field characteristic p. static const p521_limb_t p521_felem_p[P521_NLIMBS] = { 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x1ff}; // s2n-bignum implementation of field arithmetic #define p521_felem_add(out, in0, in1) bignum_add_p521(out, in0, in1) #define p521_felem_sub(out, in0, in1) bignum_sub_p521(out, in0, in1) #define p521_felem_opp(out, in0) bignum_neg_p521(out, in0) #define p521_felem_to_bytes(out, in0) bignum_tolebytes_p521(out, in0) #define p521_felem_from_bytes(out, in0) bignum_fromlebytes_p521(out, in0) #define p521_felem_mul(out, in0, in1) bignum_mul_p521_selector(out, in0, in1) #define p521_felem_sqr(out, in0) bignum_sqr_p521_selector(out, in0) #else // EC_NISTP_USE_S2N_BIGNUM #if defined(EC_NISTP_USE_64BIT_LIMB) // In the 64-bit case Fiat-crypto represents a field element by 9 58-bit digits. #define P521_NLIMBS (9) typedef uint64_t p521_felem[P521_NLIMBS]; // field element typedef uint64_t p521_limb_t; // One in Fiat-crypto's representation (58-bit digits). static const p521_limb_t p521_felem_one[P521_NLIMBS] = { 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}; // The field characteristic p in Fiat-crypto's representation (58-bit digits). static const p521_limb_t p521_felem_p[P521_NLIMBS] = { 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff}; #else // 64BIT; else 32BIT // In the 32-bit case Fiat-crypto represents a field element by 19 digits // with the following bit sizes: // [28, 27, 28, 27, 28, 27, 27, 28, 27, 28, 27, 28, 27, 27, 28, 27, 28, 27, 27]. #define P521_NLIMBS (19) typedef uint32_t p521_felem[P521_NLIMBS]; // field element typedef uint32_t p521_limb_t; // One in Fiat-crypto's representation. static const p521_limb_t p521_felem_one[P521_NLIMBS] = { 0x0000001, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000, 0x0000000}; // The field characteristic p in Fiat-crypto's representation. static const p521_limb_t p521_felem_p[P521_NLIMBS] = { 0xfffffff, 0x7ffffff, 0xfffffff, 0x7ffffff, 0xfffffff, 0x7ffffff, 0x7ffffff, 0xfffffff, 0x7ffffff, 0xfffffff, 0x7ffffff, 0xfffffff, 0x7ffffff, 0x7ffffff, 0xfffffff, 0x7ffffff, 0xfffffff, 0x7ffffff, 0x7ffffff}; #endif // 64BIT // Fiat-crypto implementation of field arithmetic #define p521_felem_add(out, in0, in1) fiat_secp521r1_carry_add(out, in0, in1) #define p521_felem_sub(out, in0, in1) fiat_secp521r1_carry_sub(out, in0, in1) #define p521_felem_opp(out, in0) fiat_secp521r1_carry_opp(out, in0) #define p521_felem_mul(out, in0, in1) fiat_secp521r1_carry_mul(out, in0, in1) #define p521_felem_sqr(out, in0) fiat_secp521r1_carry_square(out, in0) #define p521_felem_to_bytes(out, in0) fiat_secp521r1_to_bytes(out, in0) #define p521_felem_from_bytes(out, in0) fiat_secp521r1_from_bytes(out, in0) #endif // EC_NISTP_USE_S2N_BIGNUM static p521_limb_t p521_felem_nz(const p521_limb_t in1[P521_NLIMBS]) { p521_limb_t is_not_zero = 0; for (int i = 0; i < P521_NLIMBS; i++) { is_not_zero |= in1[i]; } #if defined(EC_NISTP_USE_S2N_BIGNUM) return is_not_zero; #else // Fiat-crypto functions may return p (the field characteristic) // instead of 0 in some cases, so we also check for that. p521_limb_t is_not_p = 0; for (int i = 0; i < P521_NLIMBS; i++) { is_not_p |= (in1[i] ^ p521_felem_p[i]); } return ~(constant_time_is_zero_w(is_not_p) | constant_time_is_zero_w(is_not_zero)); #endif } static void p521_felem_copy(p521_limb_t out[P521_NLIMBS], const p521_limb_t in1[P521_NLIMBS]) { for (size_t i = 0; i < P521_NLIMBS; i++) { out[i] = in1[i]; } } static void p521_felem_cmovznz(p521_limb_t out[P521_NLIMBS], p521_limb_t t, const p521_limb_t z[P521_NLIMBS], const p521_limb_t nz[P521_NLIMBS]) { p521_limb_t mask = constant_time_is_zero_w(t); for (size_t i = 0; i < P521_NLIMBS; i++) { out[i] = constant_time_select_w(mask, z[i], nz[i]); } } // NOTE: the input and output are in little-endian representation. static void p521_from_generic(p521_felem out, const EC_FELEM *in) { #ifdef OPENSSL_BIG_ENDIAN uint8_t tmp[P521_EC_FELEM_BYTES]; bn_words_to_little_endian(tmp, P521_EC_FELEM_BYTES, in->words, P521_EC_FELEM_WORDS); p521_felem_from_bytes(out, tmp); #else p521_felem_from_bytes(out, (const uint8_t *)in->words); #endif } // NOTE: the input and output are in little-endian representation. static void p521_to_generic(EC_FELEM *out, const p521_felem in) { // |p521_felem_to_bytes| function will write the result to the first 66 bytes // of |out| which is exactly how many bytes are needed to represent a 521-bit // element. // The number of BN_ULONGs to represent a 521-bit value is 9 and 17, when // BN_ULONG is 64-bit and 32-bit, respectively. Nine 64-bit BN_ULONGs // translate to 72 bytes, which means that we have to make sure that the // extra 6 bytes are zeroed out. To avoid confusion over 32 vs. 64 bit // systems and Fiat's vs. ours representation we zero out the whole element. #ifdef OPENSSL_BIG_ENDIAN uint8_t tmp[P521_EC_FELEM_BYTES]; p521_felem_to_bytes(tmp, in); bn_little_endian_to_words(out->words, P521_EC_FELEM_WORDS, tmp, P521_EC_FELEM_BYTES); #else OPENSSL_memset((uint8_t*)out->words, 0, sizeof(out->words)); // Convert the element to bytes. p521_felem_to_bytes((uint8_t *)out->words, in); #endif } // Finite field inversion using Fermat Little Theorem. // The code is autogenerated by the ECCKiila project: // https://arxiv.org/abs/2007.11481 static void p521_felem_inv(p521_felem output, const p521_felem t1) { /* temporary variables */ p521_felem acc, t2, t4, t8, t16, t32, t64; p521_felem t128, t256, t512, t516, t518, t519; p521_felem_sqr(acc, t1); p521_felem_mul(t2, acc, t1); p521_felem_sqr(acc, t2); p521_felem_sqr(acc, acc); p521_felem_mul(t4, acc, t2); p521_felem_sqr(acc, t4); for (int i = 0; i < 3; i++) { p521_felem_sqr(acc, acc); } p521_felem_mul(t8, acc, t4); p521_felem_sqr(acc, t8); for (int i = 0; i < 7; i++) { p521_felem_sqr(acc, acc); } p521_felem_mul(t16, acc, t8); p521_felem_sqr(acc, t16); for (int i = 0; i < 15; i++) { p521_felem_sqr(acc, acc); } p521_felem_mul(t32, acc, t16); p521_felem_sqr(acc, t32); for (int i = 0; i < 31; i++) { p521_felem_sqr(acc, acc); } p521_felem_mul(t64, acc, t32); p521_felem_sqr(acc, t64); for (int i = 0; i < 63; i++) { p521_felem_sqr(acc, acc); } p521_felem_mul(t128, acc, t64); p521_felem_sqr(acc, t128); for (int i = 0; i < 127; i++) { p521_felem_sqr(acc, acc); } p521_felem_mul(t256, acc, t128); p521_felem_sqr(acc, t256); for (int i = 0; i < 255; i++) { p521_felem_sqr(acc, acc); } p521_felem_mul(t512, acc, t256); p521_felem_sqr(acc, t512); for (int i = 0; i < 3; i++) { p521_felem_sqr(acc, acc); } p521_felem_mul(t516, acc, t4); p521_felem_sqr(acc, t516); p521_felem_sqr(acc, acc); p521_felem_mul(t518, acc, t2); p521_felem_sqr(acc, t518); p521_felem_mul(t519, acc, t1); p521_felem_sqr(acc, t519); p521_felem_sqr(acc, acc); p521_felem_mul(output, acc, t1); } static void p521_point_double(p521_felem x_out, p521_felem y_out, p521_felem z_out, const p521_felem x_in, const p521_felem y_in, const p521_felem z_in) { ec_nistp_point_double(p521_methods(), x_out, y_out, z_out, x_in, y_in, z_in); } // p521_point_add calculates (x1, y1, z1) + (x2, y2, z2) // // The method is taken from: // http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl // adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). // // Coq transcription and correctness proof: // // static void p521_point_add(p521_felem x3, p521_felem y3, p521_felem z3, const p521_felem x1, const p521_felem y1, const p521_felem z1, const int mixed, const p521_felem x2, const p521_felem y2, const p521_felem z2) { ec_nistp_point_add(p521_methods(), x3, y3, z3, x1, y1, z1, mixed, x2, y2, z2); } #if defined(EC_NISTP_USE_S2N_BIGNUM) DEFINE_METHOD_FUNCTION(ec_nistp_meth, p521_methods) { out->felem_num_limbs = P521_NLIMBS; out->felem_num_bits = 521; out->felem_add = bignum_add_p521; out->felem_sub = bignum_sub_p521; out->felem_mul = bignum_mul_p521_selector; out->felem_sqr = bignum_sqr_p521_selector; out->felem_neg = bignum_neg_p521; out->felem_nz = p521_felem_nz; out->point_dbl = p521_point_double; out->point_add = p521_point_add; } #else DEFINE_METHOD_FUNCTION(ec_nistp_meth, p521_methods) { out->felem_num_limbs = P521_NLIMBS; out->felem_num_bits = 521; out->felem_add = fiat_secp521r1_carry_add; out->felem_sub = fiat_secp521r1_carry_sub; out->felem_mul = fiat_secp521r1_carry_mul; out->felem_sqr = fiat_secp521r1_carry_square; out->felem_neg = fiat_secp521r1_carry_opp; out->felem_nz = p521_felem_nz; out->point_dbl = p521_point_double; out->point_add = p521_point_add; } #endif // OPENSSL EC_METHOD FUNCTIONS // Takes the Jacobian coordinates (X, Y, Z) of a point and returns: // (X', Y') = (X/Z^2, Y/Z^3). static int ec_GFp_nistp521_point_get_affine_coordinates( const EC_GROUP *group, const EC_JACOBIAN *point, EC_FELEM *x_out, EC_FELEM *y_out) { if (constant_time_declassify_w(ec_GFp_simple_is_at_infinity(group, point))) { OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY); return 0; } p521_felem z1, z2; p521_from_generic(z1, &point->Z); p521_felem_inv(z2, z1); p521_felem_sqr(z2, z2); if (x_out != NULL) { p521_felem x; p521_from_generic(x, &point->X); p521_felem_mul(x, x, z2); p521_to_generic(x_out, x); } if (y_out != NULL) { p521_felem y; p521_from_generic(y, &point->Y); p521_felem_sqr(z2, z2); // z^-4 p521_felem_mul(y, y, z1); // y * z p521_felem_mul(y, y, z2); // y * z^-3 p521_to_generic(y_out, y); } return 1; } static void ec_GFp_nistp521_add(const EC_GROUP *group, EC_JACOBIAN *r, const EC_JACOBIAN *a, const EC_JACOBIAN *b) { p521_felem x1, y1, z1, x2, y2, z2; p521_from_generic(x1, &a->X); p521_from_generic(y1, &a->Y); p521_from_generic(z1, &a->Z); p521_from_generic(x2, &b->X); p521_from_generic(y2, &b->Y); p521_from_generic(z2, &b->Z); p521_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, z2); p521_to_generic(&r->X, x1); p521_to_generic(&r->Y, y1); p521_to_generic(&r->Z, z1); } static void ec_GFp_nistp521_dbl(const EC_GROUP *group, EC_JACOBIAN *r, const EC_JACOBIAN *a) { p521_felem x, y, z; p521_from_generic(x, &a->X); p521_from_generic(y, &a->Y); p521_from_generic(z, &a->Z); p521_point_double(x, y, z, x, y, z); p521_to_generic(&r->X, x); p521_to_generic(&r->Y, y); p521_to_generic(&r->Z, z); } // ---------------------------------------------------------------------------- // SCALAR MULTIPLICATION OPERATIONS // ---------------------------------------------------------------------------- // // The method for computing scalar products in functions: // - |ec_GFp_nistp521_point_mul|, // - |ec_GFp_nistp521_point_mul_base|, // - |ec_GFp_nistp521_point_mul_public|, // is adapted from ECCKiila project (https://arxiv.org/abs/2007.11481). // The main difference is that we use a window of size 7 instead of 5 for the // first two functions. The potential issue with window sizes is that for some // sizes a scalar can be found such that a case of point doubling instead of // point addition happens in the scalar multiplication. This would make the // multiplication non constant-time. Therefore, such window sizes have to be // avoided. The windows size of 7 is chosen based on analysis analogous to // the one in |ec_GFp_nistp_recode_scalar_bits| function in |util.c| file. // See the analysis at the bottom of this file. // // Moreover, the order in which the digits of the scalar are processed in // |ec_GFp_nistp521_point_mul_base| is different from the ECCKiila project, to // ensure that the least significant digit is processed last which together // with the window size 7 guarantees constant-time execution of the function. // // Another difference is that in |ec_GFp_nistp521_point_mul_public| function we // use window size 5 for the public point and 7 for the base point. Here it is // ok to use window of size 5 since the scalar is public and therefore the // function doesn't have to be constant-time. // // The precomputed table of base point multiples is generated by the code in // |make_tables.go| script. // Constants for scalar encoding in the scalar multiplication functions. #define P521_MUL_WSIZE (5) // window size w // Assert the window size is 5 because the pre-computed table in |p521_table.h| // is generated for window size 5. OPENSSL_STATIC_ASSERT(P521_MUL_WSIZE == 5, p521_scalar_mul_window_size_is_not_equal_to_five) #define P521_MUL_TWO_TO_WSIZE (1 << P521_MUL_WSIZE) // Number of |P521_MUL_WSIZE|-bit windows in a 521-bit value #define P521_MUL_NWINDOWS ((521 + P521_MUL_WSIZE - 1)/P521_MUL_WSIZE) // For the public point in |ec_GFp_nistp521_point_mul_public| function // we use window size equal to 5. #define P521_MUL_PUB_WSIZE (5) // We keep only odd multiples in tables, hence the table size is (2^w)/2 #define P521_MUL_TABLE_SIZE (P521_MUL_TWO_TO_WSIZE >> 1) #define P521_MUL_PUB_TABLE_SIZE (1 << (P521_MUL_PUB_WSIZE - 1)) // p521_select_point_affine selects the |idx|-th affine point from // the given precomputed table and copies it to |out| in constant-time. static void p521_select_point_affine(p521_felem out[2], size_t idx, const p521_felem table[][2], size_t table_size) { OPENSSL_memset(out, 0, sizeof(p521_felem) * 2); for (size_t i = 0; i < table_size; i++) { p521_limb_t mismatch = i ^ idx; p521_felem_cmovznz(out[0], mismatch, table[i][0], out[0]); p521_felem_cmovznz(out[1], mismatch, table[i][1], out[1]); } } // Multiplication of an arbitrary point by a scalar, r = [scalar]P. static void ec_GFp_nistp521_point_mul(const EC_GROUP *group, EC_JACOBIAN *r, const EC_JACOBIAN *p, const EC_SCALAR *scalar) { p521_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}}; p521_from_generic(tmp[0], &p->X); p521_from_generic(tmp[1], &p->Y); p521_from_generic(tmp[2], &p->Z); ec_nistp_scalar_mul(p521_methods(), res[0], res[1], res[2], tmp[0], tmp[1], tmp[2], scalar); p521_to_generic(&r->X, res[0]); p521_to_generic(&r->Y, res[1]); p521_to_generic(&r->Z, res[2]); } // Include the precomputed table for the based point scalar multiplication. #include "p521_table.h" // Multiplication of the base point G of P-521 curve with the given scalar. // The product is computed with the Comb method using the precomputed table // |p521_g_pre_comp| from |p521_table.h| file and the regular-wNAF scalar // encoding. // // The |p521_g_pre_comp| table has 27 sub-tables each holding 16 points: // 0 : [1]G, [3]G, ..., [31]G // 1 : [1*2^20]G, [3*2^20]G, ..., [31*2^20]G // ... // i : [1*2^20i]G, [3*2^20i]G, ..., [31*2^20i]G // ... // 26 : [2^520]G, [3*2^520]G, ..., [31*2^520]G // Computing the negation of a point P = (x, y) is relatively easy: // -P = (x, -y). // So we may assume that for each sub-table we have 32 points instead of 16: // [\pm 1*2^20i]G, [\pm 3*2^20i]G, ..., [\pm 31*2^20i]G. // // The 521-bit |scalar| is recoded (regular-wNAF encoding) into 105 signed // digits, each of length 5 bits, as explained in the // |p521_felem_mul_scalar_rwnaf| function. Namely, // scalar' = s_0 + s_1*2^5 + s_2*2^10 + ... + s_104*2^520, // where digits s_i are in [\pm 1, \pm 3, ..., \pm 31]. Note that for an odd // scalar we have that scalar = scalar', while in the case of an even // scalar we have that scalar = scalar' - 1. // // To compute the required product, [scalar]G, we may do the following. // Group the recoded digits of the scalar in 4 groups: // | corresponding multiples in // digits | the recoded representation // ------------------------------------------------------------------------- // (0): {s_0, s_4, s_8, ..., s_100, s_104} | { 2^0, 2^20, ..., 2^500, 2^520} // (1): {s_1, s_5, s_9, ..., s_101} | { 2^5, 2^25, ..., 2^505} // (2): {s_2, s_6, s_10, ..., s_102} | {2^10, 2^30, ..., 2^510} // (3): {s_3, s_7, s_11, ..., s_103} | {2^15, 2^35, ..., 2^515} // corresponding sub-table lookup | { T0, T1, ..., T25, T26} // // The group (0) digits correspond precisely to the multiples of G that are // held in the 27 precomputed sub-tables, so we may simply read the appropriate // points from the sub-tables and sum them all up (negating if needed, i.e., if // a digit s_i is negative, we read the point corresponding to the abs(s_i) and // negate it before adding it to the sum). // The remaining three groups (1), (2), and (3), correspond to the multiples // of G from the sub-tables multiplied additionally by 2^5, 2^10, and 2^15, // respectively. Therefore, for these groups we may read the appropriate points // from the table, double them 5, 10, or 15 times, respectively, and add them // to the final result. // // To minimize the number of required doubling operations we process the digits // of the scalar from left to right. In other words, the algorithm is: // 1. Read the points corresponding to the group (3) digits from the table // and add them to an accumulator. // 2. Double the accumulator 5 times. // 3. Repeat steps 1. and 2. for groups (2) and (1), // and perform step 1. for group (0). // 4. If the scalar is even subtract G from the accumulator. // // Note: this function is constant-time. static void ec_GFp_nistp521_point_mul_base(const EC_GROUP *group, EC_JACOBIAN *r, const EC_SCALAR *scalar) { p521_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}}, ftmp; int16_t rnaf[P521_MUL_NWINDOWS] = {0}; // Recode the scalar. scalar_rwnaf(rnaf, P521_MUL_WSIZE, scalar, 521); // Process the 4 groups of digits starting from group (3) down to group (0). for (int i = 3; i >= 0; i--) { // Double |res| 5 times in each iteration, except in the first one. for (size_t j = 0; i != 3 && j < P521_MUL_WSIZE; j++) { p521_point_double(res[0], res[1], res[2], res[0], res[1], res[2]); } // Process the digits in the current group from the most to the least // significant one (this is a requirement to ensure that the case of point // doubling can't happen). // For group (3) we process digits s_103 to s_3, for group (2) s_102 to s_2, // group (1) s_101 to s_1, and for group (0) s_104 to s_0. const size_t start_idx = ((P521_MUL_NWINDOWS - i - 1)/4)*4 + i; for (int j = start_idx; j >= 0; j -= 4) { // For each digit |d| in the current group read the corresponding point // from the table and add it to |res|. If |d| is negative, negate // the point before adding it to |res|. int16_t d = rnaf[j]; // is_neg = (d < 0) ? 1 : 0 int16_t is_neg = (d >> 15) & 1; // d = abs(d) d = (d ^ -is_neg) + is_neg; int16_t idx = d >> 1; // Select the point to add, in constant time. p521_select_point_affine(tmp, idx, p521_g_pre_comp[j / 4], P521_MUL_TABLE_SIZE); // Negate y coordinate of the point tmp = (x, y); ftmp = -y. p521_felem_opp(ftmp, tmp[1]); // Conditionally select y or -y depending on the sign of the digit |d|. p521_felem_cmovznz(tmp[1], is_neg, tmp[1], ftmp); // Add the point to the accumulator |res|. // Note that the points in the pre-computed table are given with affine // coordinates. The point addition function computes a sum of two points, // either both given in projective, or one in projective and the other one // in affine coordinates. The |mixed| flag indicates the latter option, // in which case we set the third coordinate of the second point to one. p521_point_add(res[0], res[1], res[2], res[0], res[1], res[2], 1 /* mixed */, tmp[0], tmp[1], p521_felem_one); } } // Conditionally subtract G if the scalar is even, in constant-time. // First, compute |tmp| = |res| + (-G). p521_felem_copy(tmp[0], p521_g_pre_comp[0][0][0]); p521_felem_opp(tmp[1], p521_g_pre_comp[0][0][1]); p521_point_add(tmp[0], tmp[1], tmp[2], res[0], res[1], res[2], 1 /* mixed */, tmp[0], tmp[1], p521_felem_one); // Select |res| or |tmp| based on the |scalar| parity. p521_felem_cmovznz(res[0], scalar->words[0] & 1, tmp[0], res[0]); p521_felem_cmovznz(res[1], scalar->words[0] & 1, tmp[1], res[1]); p521_felem_cmovznz(res[2], scalar->words[0] & 1, tmp[2], res[2]); // Copy the result to the output. p521_to_generic(&r->X, res[0]); p521_to_generic(&r->Y, res[1]); p521_to_generic(&r->Z, res[2]); } // Computes [g_scalar]G + [p_scalar]P, where G is the base point of the P-521 // curve, and P is the given point |p|. // // Both scalar products are computed by the same "textbook" wNAF method, // with w = 5 for g_scalar and w = 5 for p_scalar. // For the base point G product we use the first sub-table of the precomputed // table |p521_g_pre_comp| from |p521_table.h| file, while for P we generate // |p_pre_comp| table on-the-fly. The tables hold the first 16 odd multiples // of G or P: // g_pre_comp = {[1]G, [3]G, ..., [31]G}, // p_pre_comp = {[1]P, [3]P, ..., [31]P}. // Computing the negation of a point P = (x, y) is relatively easy: // -P = (x, -y). // So we may assume that we also have the negatives of the points in the tables. // // The 521-bit scalars are recoded by the textbook wNAF method to 522 digits, // where a digit is either a zero or an odd integer in [-31, 31]. The method // guarantees that each non-zero digit is followed by at least four // zeroes. // // The result [g_scalar]G + [p_scalar]P is computed by the following algorithm: // 1. Initialize the accumulator with the point-at-infinity. // 2. For i starting from 521 down to 0: // 3. Double the accumulator (doubling can be skipped while the // accumulator is equal to the point-at-infinity). // 4. Read from |p_pre_comp| the point corresponding to the i-th digit of // p_scalar, negate it if the digit is negative, and add it to the // accumulator. // 5. Read from |g_pre_comp| the point corresponding to the i-th digit of // g_scalar, negate it if the digit is negative, and add it to the // accumulator. // // Note: this function is NOT constant-time. static void ec_GFp_nistp521_point_mul_public(const EC_GROUP *group, EC_JACOBIAN *r, const EC_SCALAR *g_scalar, const EC_JACOBIAN *p, const EC_SCALAR *p_scalar) { p521_felem res[3] = {{0}, {0}, {0}}, two_p[3] = {{0}, {0}, {0}}, ftmp; // Table of multiples of P: [2i + 1]P for i in [0, 15]. p521_felem p_pre_comp[P521_MUL_PUB_TABLE_SIZE][3]; // Set the first point in the table to P. p521_from_generic(p_pre_comp[0][0], &p->X); p521_from_generic(p_pre_comp[0][1], &p->Y); p521_from_generic(p_pre_comp[0][2], &p->Z); // Compute two_p = [2]P. p521_point_double(two_p[0], two_p[1], two_p[2], p_pre_comp[0][0], p_pre_comp[0][1], p_pre_comp[0][2]); // Generate the remaining 15 multiples of P. for (size_t i = 1; i < P521_MUL_PUB_TABLE_SIZE; i++) { p521_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2], two_p[0], two_p[1], two_p[2], 0 /* both Jacobian */, p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], p_pre_comp[i - 1][2]); } // Recode the scalars. int8_t p_wnaf[522] = {0}, g_wnaf[522] = {0}; ec_compute_wNAF(group, p_wnaf, p_scalar, 521, P521_MUL_PUB_WSIZE); ec_compute_wNAF(group, g_wnaf, g_scalar, 521, P521_MUL_WSIZE); // In the beginning res is set to point-at-infinity, so we set the flag. int16_t res_is_inf = 1; int16_t d, is_neg, idx; for (int i = 521; i >= 0; i--) { // If |res| is point-at-infinity there is no point in doubling so skip it. if (!res_is_inf) { p521_point_double(res[0], res[1], res[2], res[0], res[1], res[2]); } // Process the p_scalar digit. d = p_wnaf[i]; if (d != 0) { is_neg = d < 0 ? 1 : 0; idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; if (res_is_inf) { // If |res| is point-at-infinity there is no need to add the new point, // we can simply copy it. p521_felem_copy(res[0], p_pre_comp[idx][0]); p521_felem_copy(res[1], p_pre_comp[idx][1]); p521_felem_copy(res[2], p_pre_comp[idx][2]); res_is_inf = 0; } else { // Otherwise, add to the accumulator either the point at position idx // in the table or its negation. if (is_neg) { p521_felem_opp(ftmp, p_pre_comp[idx][1]); } else { p521_felem_copy(ftmp, p_pre_comp[idx][1]); } p521_point_add(res[0], res[1], res[2], res[0], res[1], res[2], 0 /* both Jacobian */, p_pre_comp[idx][0], ftmp, p_pre_comp[idx][2]); } } // Process the g_scalar digit. d = g_wnaf[i]; if (d != 0) { is_neg = d < 0 ? 1 : 0; idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1; if (res_is_inf) { // If |res| is point-at-infinity there is no need to add the new point, // we can simply copy it. p521_felem_copy(res[0], p521_g_pre_comp[0][idx][0]); p521_felem_copy(res[1], p521_g_pre_comp[0][idx][1]); p521_felem_copy(res[2], p521_felem_one); res_is_inf = 0; } else { // Otherwise, add to the accumulator either the point at position idx // in the table or its negation. if (is_neg) { p521_felem_opp(ftmp, p521_g_pre_comp[0][idx][1]); } else { p521_felem_copy(ftmp, p521_g_pre_comp[0][idx][1]); } // Add the point to the accumulator |res|. // Note that the points in the pre-computed table are given with affine // coordinates. The point addition function computes a sum of two points, // either both given in projective, or one in projective and one in // affine coordinates. The |mixed| flag indicates the latter option, // in which case we set the third coordinate of the second point to one. p521_point_add(res[0], res[1], res[2], res[0], res[1], res[2], 1 /* mixed */, p521_g_pre_comp[0][idx][0], ftmp, p521_felem_one); } } } // Copy the result to the output. p521_to_generic(&r->X, res[0]); p521_to_generic(&r->Y, res[1]); p521_to_generic(&r->Z, res[2]); } static void ec_GFp_nistp521_felem_mul(const EC_GROUP *group, EC_FELEM *r, const EC_FELEM *a, const EC_FELEM *b) { p521_felem felem1, felem2, felem3; p521_from_generic(felem1, a); p521_from_generic(felem2, b); p521_felem_mul(felem3, felem1, felem2); p521_to_generic(r, felem3); } static void ec_GFp_nistp521_felem_sqr(const EC_GROUP *group, EC_FELEM *r, const EC_FELEM *a) { p521_felem felem1, felem2; p521_from_generic(felem1, a); p521_felem_sqr(felem2, felem1); p521_to_generic(r, felem2); } DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp521_method) { out->point_get_affine_coordinates = ec_GFp_nistp521_point_get_affine_coordinates; out->add = ec_GFp_nistp521_add; out->dbl = ec_GFp_nistp521_dbl; out->mul = ec_GFp_nistp521_point_mul; out->mul_base = ec_GFp_nistp521_point_mul_base; out->mul_public = ec_GFp_nistp521_point_mul_public; out->felem_mul = ec_GFp_nistp521_felem_mul; out->felem_sqr = ec_GFp_nistp521_felem_sqr; out->felem_to_bytes = ec_GFp_simple_felem_to_bytes; out->felem_from_bytes = ec_GFp_simple_felem_from_bytes; out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery; out->scalar_to_montgomery_inv_vartime = ec_simple_scalar_to_montgomery_inv_vartime; out->cmp_x_coordinate = ec_GFp_simple_cmp_x_coordinate; } // ---------------------------------------------------------------------------- // Analysis of the doubling case occurrence in the Joye-Tunstall recoding: // see the analysis at the bottom of the |p384.c| file. #endif // !defined(OPENSSL_SMALL)