/*********************************************************************************** * FourQlib: a high-performance crypto library based on the elliptic curve FourQ * * Copyright (c) Microsoft Corporation. All rights reserved. * * Abstract: core GF(p^2) and ECC operations over GF(p^2) * * This code is based on the paper "FourQ: four-dimensional decompositions on a * Q-curve over the Mersenne prime" by Craig Costello and Patrick Longa, in Advances * in Cryptology - ASIACRYPT, 2015. * Preprint available at http://eprint.iacr.org/2015/565. ************************************************************************************/ #include "fourq/FourQ_internal.h" #include "fourq/FourQ_params.h" #include "fourq/FourQ_tables.h" #if defined(GENERIC_IMPLEMENTATION) #include "fourq/generic/fp.h" #elif (TARGET == TARGET_AMD64) #include "fourq/amd64/fp_x64.h" #elif (TARGET == TARGET_ARM64) #include "fourq/arm64/fp_arm64.h" #endif /***********************************************/ /************* GF(p^2) FUNCTIONS ***************/ void fp2copy1271(f2elm_t a, f2elm_t c) { // Copy of a GF(p^2) element, c = a fpcopy1271(a[0], c[0]); fpcopy1271(a[1], c[1]); } void fp2zero1271(f2elm_t a) { // Zeroing a GF(p^2) element, a = 0 fpzero1271(a[0]); fpzero1271(a[1]); } void fp2neg1271(f2elm_t a) { // GF(p^2) negation, a = -a in GF((2^127-1)^2) fpneg1271(a[0]); fpneg1271(a[1]); } void fp2sqr1271(f2elm_t a, f2elm_t c) { // GF(p^2) squaring, c = a^2 in GF((2^127-1)^2) #ifdef ASM_SUPPORT fp2sqr1271_a(a, c); #else felm_t t1, t2, t3; fpadd1271(a[0], a[1], t1); // t1 = a0+a1 fpsub1271(a[0], a[1], t2); // t2 = a0-a1 fpmul1271(a[0], a[1], t3); // t3 = a0*a1 fpmul1271(t1, t2, c[0]); // c0 = (a0+a1)(a0-a1) fpadd1271(t3, t3, c[1]); // c1 = 2a0*a1 #ifdef TEMP_ZEROING clear_words((void *)t1, sizeof(felm_t) / sizeof(unsigned int)); clear_words((void *)t2, sizeof(felm_t) / sizeof(unsigned int)); clear_words((void *)t3, sizeof(felm_t) / sizeof(unsigned int)); #endif #endif } void fp2mul1271(f2elm_t a, f2elm_t b, f2elm_t c) { // GF(p^2) multiplication, c = a*b in GF((2^127-1)^2) #if defined(ASM_SUPPORT) fp2mul1271_a(a, b, c); #else felm_t t1, t2, t3, t4; fpmul1271(a[0], b[0], t1); // t1 = a0*b0 fpmul1271(a[1], b[1], t2); // t2 = a1*b1 fpadd1271(a[0], a[1], t3); // t3 = a0+a1 fpadd1271(b[0], b[1], t4); // t4 = b0+b1 fpsub1271(t1, t2, c[0]); // c[0] = a0*b0 - a1*b1 fpmul1271(t3, t4, t3); // t3 = (a0+a1)*(b0+b1) fpsub1271(t3, t1, t3); // t3 = (a0+a1)*(b0+b1) - a0*b0 fpsub1271(t3, t2, c[1]); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 #ifdef TEMP_ZEROING clear_words((void *)t1, sizeof(felm_t) / sizeof(unsigned int)); clear_words((void *)t2, sizeof(felm_t) / sizeof(unsigned int)); clear_words((void *)t3, sizeof(felm_t) / sizeof(unsigned int)); clear_words((void *)t4, sizeof(felm_t) / sizeof(unsigned int)); #endif #endif } void fp2add1271(f2elm_t a, f2elm_t b, f2elm_t c) { // GF(p^2) addition, c = a+b in GF((2^127-1)^2) fpadd1271(a[0], b[0], c[0]); fpadd1271(a[1], b[1], c[1]); } void fp2sub1271(f2elm_t a, f2elm_t b, f2elm_t c) { // GF(p^2) subtraction, c = a-b in GF((2^127-1)^2) fpsub1271(a[0], b[0], c[0]); fpsub1271(a[1], b[1], c[1]); } static void fp2addsub1271(f2elm_t a, f2elm_t b, f2elm_t c) { // GF(p^2) addition followed by subtraction, c = 2a-b in GF((2^127-1)^2) #ifdef ASM_SUPPORT fp2addsub1271_a(a, b, c); #else fp2add1271(a, a, a); fp2sub1271(a, b, c); #endif } void fp2inv1271(f2elm_t a) { // GF(p^2) inversion, a = (a0-i*a1)/(a0^2+a1^2) f2elm_t t1; fpsqr1271(a[0], t1[0]); // t10 = a0^2 fpsqr1271(a[1], t1[1]); // t11 = a1^2 fpadd1271(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2 fpinv1271(t1[0]); // t10 = (a0^2+a1^2)^-1 fpneg1271(a[1]); // a = a0-i*a1 fpmul1271(a[0], t1[0], a[0]); fpmul1271(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 #ifdef TEMP_ZEROING clear_words((void *)t1, sizeof(f2elm_t) / sizeof(unsigned int)); #endif } void clear_words(void *mem, unsigned int nwords) { // Clear integer-size digits from memory. "nwords" indicates the number of integer digits to be // zeroed. This function uses the volatile type qualifier to inform the compiler not to optimize // out the memory clearing. It has been tested with MSVS 2013 and GNU GCC 4.6.3, 4.7.3, 4.8.2 // and 4.8.4. Users are responsible for verifying correctness with different compilers. See // "Compliant Solution (C99)" at // https://www.securecoding.cert.org/confluence/display/c/MSC06-C.+Beware+of+compiler+optimizations unsigned int i; volatile unsigned int *v = mem; for (i = 0; i < nwords; i++) v[i] = 0; } /***********************************************/ /********** CURVE/SCALAR FUNCTIONS ***********/ void eccset(point_t P) { // Set generator // Output: P = (x,y) fp2copy1271((felm_t *)&GENERATOR_x, P->x); // X1 fp2copy1271((felm_t *)&GENERATOR_y, P->y); // Y1 } void eccnorm(point_extproj_t P, point_t Q) { // Normalize a projective point (X1:Y1:Z1), including full reduction // Input: P = (X1:Y1:Z1) in twisted Edwards coordinates // Output: Q = (X1/Z1,Y1/Z1), corresponding to (X1:Y1:Z1:T1) in extended twisted Edwards // coordinates fp2inv1271(P->z); // Z1 = Z1^-1 fp2mul1271(P->x, P->z, Q->x); // X1 = X1/Z1 fp2mul1271(P->y, P->z, Q->y); // Y1 = Y1/Z1 mod1271(Q->x[0]); mod1271(Q->x[1]); mod1271(Q->y[0]); mod1271(Q->y[1]); } void R1_to_R2(point_extproj_t P, point_extproj_precomp_t Q) { // Conversion from representation (X,Y,Z,Ta,Tb) to (X+Y,Y-X,2Z,2dT), where T = Ta*Tb // Input: P = (X1,Y1,Z1,Ta,Tb), where T1 = Ta*Tb, corresponding to (X1:Y1:Z1:T1) in extended // twisted Edwards coordinates Output: Q = (X1+Y1,Y1-X1,2Z1,2dT1) corresponding to (X1:Y1:Z1:T1) // in extended twisted Edwards coordinates fp2add1271(P->ta, P->ta, Q->t2); // T = 2*Ta fp2add1271(P->x, P->y, Q->xy); // QX = X+Y fp2sub1271(P->y, P->x, Q->yx); // QY = Y-X fp2mul1271(Q->t2, P->tb, Q->t2); // T = 2*T fp2add1271(P->z, P->z, Q->z2); // QZ = 2*Z fp2mul1271(Q->t2, (felm_t *)&PARAMETER_d, Q->t2); // QT = 2d*T } void R1_to_R3(point_extproj_t P, point_extproj_precomp_t Q) { // Conversion from representation (X,Y,Z,Ta,Tb) to (X+Y,Y-X,Z,T), where T = Ta*Tb // Input: P = (X1,Y1,Z1,Ta,Tb), where T1 = Ta*Tb, corresponding to (X1:Y1:Z1:T1) in extended // twisted Edwards coordinates Output: Q = (X1+Y1,Y1-X1,Z1,T1) corresponding to (X1:Y1:Z1:T1) in // extended twisted Edwards coordinates fp2add1271(P->x, P->y, Q->xy); // XQ = (X1+Y1) fp2sub1271(P->y, P->x, Q->yx); // YQ = (Y1-X1) fp2mul1271(P->ta, P->tb, Q->t2); // TQ = T1 fp2copy1271(P->z, Q->z2); // ZQ = Z1 } void R2_to_R4(point_extproj_precomp_t P, point_extproj_t Q) { // Conversion from representation (X+Y,Y-X,2Z,2dT) to (2X,2Y,2Z,2dT) // Input: P = (X1+Y1,Y1-X1,2Z1,2dT1) corresponding to (X1:Y1:Z1:T1) in extended twisted Edwards // coordinates Output: Q = (2X1,2Y1,2Z1) corresponding to (X1:Y1:Z1) in twisted Edwards // coordinates fp2sub1271(P->xy, P->yx, Q->x); // XQ = 2*X1 fp2add1271(P->xy, P->yx, Q->y); // YQ = 2*Y1 fp2copy1271(P->z2, Q->z); // ZQ = 2*Z1 } void eccdouble(point_extproj_t P) { // Point doubling 2P // Input: P = (X1:Y1:Z1) in twisted Edwards coordinates // Output: 2P = (Xfinal,Yfinal,Zfinal,Tafinal,Tbfinal), where Tfinal = Tafinal*Tbfinal, // corresponding to (Xfinal:Yfinal:Zfinal:Tfinal) in extended twisted Edwards coordinates f2elm_t t1, t2; fp2sqr1271(P->x, t1); // t1 = X1^2 fp2sqr1271(P->y, t2); // t2 = Y1^2 fp2add1271(P->x, P->y, P->x); // t3 = X1+Y1 fp2add1271(t1, t2, P->tb); // Tbfinal = X1^2+Y1^2 fp2sub1271(t2, t1, t1); // t1 = Y1^2-X1^2 fp2sqr1271(P->x, P->ta); // Ta = (X1+Y1)^2 fp2sqr1271(P->z, t2); // t2 = Z1^2 fp2sub1271(P->ta, P->tb, P->ta); // Tafinal = 2X1*Y1 = (X1+Y1)^2-(X1^2+Y1^2) fp2addsub1271(t2, t1, t2); // t2 = 2Z1^2-(Y1^2-X1^2) fp2mul1271(t1, P->tb, P->y); // Yfinal = (X1^2+Y1^2)(Y1^2-X1^2) fp2mul1271(t2, P->ta, P->x); // Xfinal = 2X1*Y1*[2Z1^2-(Y1^2-X1^2)] fp2mul1271(t1, t2, P->z); // Zfinal = (Y1^2-X1^2)[2Z1^2-(Y1^2-X1^2)] #ifdef TEMP_ZEROING clear_words((void *)t1, sizeof(f2elm_t) / sizeof(unsigned int)); clear_words((void *)t2, sizeof(f2elm_t) / sizeof(unsigned int)); #endif } void eccadd_core(point_extproj_precomp_t P, point_extproj_precomp_t Q, point_extproj_t R) { // Basic point addition R = P+Q or R = P+P // Inputs: P = (X1+Y1,Y1-X1,2Z1,2dT1) corresponding to (X1:Y1:Z1:T1) in extended twisted Edwards // coordinates // Q = (X2+Y2,Y2-X2,Z2,T2) corresponding to (X2:Y2:Z2:T2) in extended twisted Edwards // coordinates // Output: R = (Xfinal,Yfinal,Zfinal,Tafinal,Tbfinal), where Tfinal = Tafinal*Tbfinal, // corresponding to (Xfinal:Yfinal:Zfinal:Tfinal) in extended twisted Edwards coordinates f2elm_t t1, t2; fp2mul1271(P->t2, Q->t2, R->z); // Z = 2dT1*T2 fp2mul1271(P->z2, Q->z2, t1); // t1 = 2Z1*Z2 fp2mul1271(P->xy, Q->xy, R->x); // X = (X1+Y1)(X2+Y2) fp2mul1271(P->yx, Q->yx, R->y); // Y = (Y1-X1)(Y2-X2) fp2sub1271(t1, R->z, t2); // t2 = theta fp2add1271(t1, R->z, t1); // t1 = alpha fp2sub1271(R->x, R->y, R->tb); // Tbfinal = beta fp2add1271(R->x, R->y, R->ta); // Tafinal = omega fp2mul1271(R->tb, t2, R->x); // Xfinal = beta*theta fp2mul1271(t1, t2, R->z); // Zfinal = theta*alpha fp2mul1271(R->ta, t1, R->y); // Yfinal = alpha*omega #ifdef TEMP_ZEROING clear_words((void *)t1, sizeof(f2elm_t) / sizeof(unsigned int)); clear_words((void *)t2, sizeof(f2elm_t) / sizeof(unsigned int)); #endif } void eccadd(point_extproj_precomp_t Q, point_extproj_t P) { // Complete point addition P = P+Q or P = P+P // Inputs: P = (X1,Y1,Z1,Ta,Tb), where T1 = Ta*Tb, corresponding to (X1:Y1:Z1:T1) in extended // twisted Edwards coordinates // Q = (X2+Y2,Y2-X2,2Z2,2dT2) corresponding to (X2:Y2:Z2:T2) in extended twisted Edwards // coordinates // Output: P = (Xfinal,Yfinal,Zfinal,Tafinal,Tbfinal), where Tfinal = Tafinal*Tbfinal, // corresponding to (Xfinal:Yfinal:Zfinal:Tfinal) in extended twisted Edwards coordinates point_extproj_precomp_t R; R1_to_R3(P, R); // R = (X1+Y1,Y1-Z1,Z1,T1) eccadd_core(Q, R, P); // P = (X2+Y2,Y2-X2,2Z2,2dT2) + (X1+Y1,Y1-Z1,Z1,T1) #ifdef TEMP_ZEROING clear_words((void *)R, sizeof(point_extproj_precomp_t) / sizeof(unsigned int)); #endif } void point_setup(point_t P, point_extproj_t Q) { // Point conversion to representation (X,Y,Z,Ta,Tb) // Input: P = (x,y) in affine coordinates // Output: P = (X,Y,1,Ta,Tb), where Ta=X, Tb=Y and T=Ta*Tb, corresponding to (X:Y:Z:T) in extended // twisted Edwards coordinates fp2copy1271(P->x, Q->x); fp2copy1271(P->y, Q->y); fp2copy1271(Q->x, Q->ta); // Ta = X1 fp2copy1271(Q->y, Q->tb); // Tb = Y1 fp2zero1271(Q->z); Q->z[0][0] = 1; // Z1 = 1 } bool ecc_point_validate(point_extproj_t P) { // Point validation: check if point lies on the curve // Input: P = (x,y) in affine coordinates, where x, y in [0, 2^127-1]. // Output: TRUE (1) if point lies on the curve E: -x^2+y^2-1-dx^2*y^2 = 0, FALSE (0) otherwise. // SECURITY NOTE: this function does not run in constant time (input point P is assumed to be // public). f2elm_t t1, t2, t3; fp2sqr1271(P->y, t1); fp2sqr1271(P->x, t2); fp2sub1271(t1, t2, t3); // -x^2 + y^2 fp2mul1271(t1, t2, t1); // x^2*y^2 fp2mul1271((felm_t *)&PARAMETER_d, t1, t2); // dx^2*y^2 fp2zero1271(t1); t1[0][0] = 1; // t1 = 1 fp2add1271(t2, t1, t2); // 1 + dx^2*y^2 fp2sub1271(t3, t2, t1); // -x^2 + y^2 - 1 - dx^2*y^2 #if defined(GENERIC_IMPLEMENTATION) { unsigned int i, j; mod1271(t1[0]); mod1271(t1[1]); for (i = 0; i < 2; i++) { for (j = 0; j < NWORDS_FIELD; j++) { if (t1[i][j] != 0) return false; } } return true; } #else return ( (is_digit_zero_ct(t1[0][0] | t1[0][1]) || is_digit_zero_ct((t1[0][0] + 1) | (t1[0][1] + 1))) & (is_digit_zero_ct(t1[1][0] | t1[1][1]) || is_digit_zero_ct((t1[1][0] + 1) | (t1[1][1] + 1)))); #endif } static void R5_to_R1(point_precomp_t P, point_extproj_t Q) { // Conversion from representation (x+y,y-x,2dt) to (X,Y,Z,Ta,Tb) // Input: P = (x1+y1,y1-x1,2dt1) corresponding to (X1:Y1:Z1:T1) in extended twisted Edwards // coordinates, where Z1=1 Output: Q = (x1,y1,z1,x1,y1), where z1=1, corresponding to // (X1:Y1:Z1:T1) in extended twisted Edwards coordinates fp2sub1271(P->xy, P->yx, Q->x); // 2*x1 fp2add1271(P->xy, P->yx, Q->y); // 2*y1 fp2div1271(Q->x); // XQ = x1 fp2div1271(Q->y); // YQ = y1 fp2zero1271(Q->z); Q->z[0][0] = 1; // ZQ = 1 fp2copy1271(Q->x, Q->ta); // TaQ = x1 fp2copy1271(Q->y, Q->tb); // TbQ = y1 } static void eccmadd(point_precomp_t Q, point_extproj_t P) { // Mixed point addition P = P+Q or P = P+P // Inputs: P = (X1,Y1,Z1,Ta,Tb), where T1 = Ta*Tb, corresponding to (X1:Y1:Z1:T1) in extended // twisted Edwards coordinates // Q = (x2+y2,y2-x2,2dt2) corresponding to (X2:Y2:Z2:T2) in extended twisted Edwards // coordinates, where Z2=1 // Output: P = (Xfinal,Yfinal,Zfinal,Tafinal,Tbfinal), where Tfinal = Tafinal*Tbfinal, // corresponding to (Xfinal:Yfinal:Zfinal:Tfinal) in extended twisted Edwards coordinates f2elm_t t1, t2; fp2mul1271(P->ta, P->tb, P->ta); // Ta = T1 fp2add1271(P->z, P->z, t1); // t1 = 2Z1 fp2mul1271(P->ta, Q->t2, P->ta); // Ta = 2dT1*t2 fp2add1271(P->x, P->y, P->z); // Z = (X1+Y1) fp2sub1271(P->y, P->x, P->tb); // Tb = (Y1-X1) fp2sub1271(t1, P->ta, t2); // t2 = theta fp2add1271(t1, P->ta, t1); // t1 = alpha fp2mul1271(Q->xy, P->z, P->ta); // Ta = (X1+Y1)(x2+y2) fp2mul1271(Q->yx, P->tb, P->x); // X = (Y1-X1)(y2-x2) fp2mul1271(t1, t2, P->z); // Zfinal = theta*alpha fp2sub1271(P->ta, P->x, P->tb); // Tbfinal = beta fp2add1271(P->ta, P->x, P->ta); // Tafinal = omega fp2mul1271(P->tb, t2, P->x); // Xfinal = beta*theta fp2mul1271(P->ta, t1, P->y); // Yfinal = alpha*omega #ifdef TEMP_ZEROING clear_words((void *)t1, sizeof(f2elm_t) / sizeof(unsigned int)); clear_words((void *)t2, sizeof(f2elm_t) / sizeof(unsigned int)); #endif } void eccmadd_ni(point_precomp_t Q, point_extproj_t P) { eccmadd(Q, P); } bool ecc_mul_fixed(digit_t *k, point_t Q) { // Fixed-base scalar multiplication Q = k*G, where G is the generator. FIXED_BASE_TABLE stores // v*2^(w-1) = 80 multiples of G. Inputs: scalar "k" in [0, 2^256-1]. Output: Q = k*G in affine // coordinates (x,y). The function is based on the modified LSB-set comb method, which converts // the scalar to an odd signed representation with (bitlength(order)+w*v) digits. unsigned int j, w = W_FIXEDBASE, v = V_FIXEDBASE, d = D_FIXEDBASE, e = E_FIXEDBASE; unsigned int digit = 0, digits[NBITS_ORDER_PLUS_ONE + (W_FIXEDBASE * V_FIXEDBASE) - 1] = { 0 }; digit_t temp[NWORDS_ORDER]; point_extproj_t R; point_precomp_t S; int i, ii; modulo_order(k, temp); // temp = k mod (order) conversion_to_odd(temp, temp); // Converting scalar to odd using the prime subgroup order mLSB_set_recode((uint64_t *)temp, digits); // Scalar recoding // Extracting initial digit digit = digits[w * d - 1]; for (i = (int)((w - 1) * d - 1); i >= (int)(2 * d - 1); i = i - d) { digit = 2 * digit + digits[i]; } // Initialize R = (x+y,y-x,2dt) with a point from the table table_lookup_fixed_base( ((point_precomp_t *)&FIXED_BASE_TABLE) + (v - 1) * (1 << (w - 1)), S, digit, digits[d - 1]); R5_to_R1(S, R); // Converting to representation (X:Y:1:Ta:Tb) for (j = 0; j < (v - 1); j++) { digit = digits[w * d - (j + 1) * e - 1]; for (i = (int)((w - 1) * d - (j + 1) * e - 1); i >= (int)(2 * d - (j + 1) * e - 1); i = i - d) { digit = 2 * digit + digits[i]; } // Extract point in (x+y,y-x,2dt) representation table_lookup_fixed_base( ((point_precomp_t *)&FIXED_BASE_TABLE) + (v - j - 2) * (1 << (w - 1)), S, digit, digits[d - (j + 1) * e - 1]); eccmadd( S, R); // R = R+S using representations (X,Y,Z,Ta,Tb) <- (X,Y,Z,Ta,Tb) + (x+y,y-x,2dt) } for (ii = (e - 2); ii >= 0; ii--) { eccdouble(R); // R = 2*R using representations (X,Y,Z,Ta,Tb) <- 2*(X,Y,Z) for (j = 0; j < v; j++) { digit = digits[w * d - j * e + ii - e]; for (i = (int)((w - 1) * d - j * e + ii - e); i >= (int)(2 * d - j * e + ii - e); i = i - d) { digit = 2 * digit + digits[i]; } // Extract point in (x+y,y-x,2dt) representation table_lookup_fixed_base( ((point_precomp_t *)&FIXED_BASE_TABLE) + (v - j - 1) * (1 << (w - 1)), S, digit, digits[d - j * e + ii - e]); eccmadd( S, R); // R = R+S using representations (X,Y,Z,Ta,Tb) <- (X,Y,Z,Ta,Tb) + (x+y,y-x,2dt) } } eccnorm(R, Q); // Conversion to affine coordinates (x,y) and modular correction. #ifdef TEMP_ZEROING clear_words((void *)digits, NBITS_ORDER_PLUS_ONE + (W_FIXEDBASE * V_FIXEDBASE) - 1); clear_words((void *)S, sizeof(point_precomp_t) / sizeof(unsigned int)); #endif return true; } void mLSB_set_recode(uint64_t *scalar, unsigned int *digits) { // Computes the modified LSB-set representation of a scalar // Inputs: scalar in [0, order-1], where the order of FourQ's subgroup is 246 bits. // Output: digits, where the first "d" values (from index 0 to (d-1)) store the signs for the // recoded values using the convention: -1 (negative), 0 (positive), and // the remaining values (from index d to (l-1)) store the recoded values in mLSB-set // representation, excluding their sign, where l = d*w and d = // ceil(bitlength(order)/(w*v))*v. The values v and w are fixed and must be in the range // [1, 10] (see FourQ.h); they determine the size of the precomputed table // "FIXED_BASE_TABLE" used by ecc_mul_fixed(). unsigned int i, j, d = D_FIXEDBASE, l = L_FIXEDBASE; uint64_t temp, carry; digits[d - 1] = 0; // Shift scalar to the right by 1 for (j = 0; j < (NWORDS64_ORDER - 1); j++) { SHIFTR(scalar[j + 1], scalar[j], 1, scalar[j], RADIX64); } scalar[NWORDS64_ORDER - 1] >>= 1; for (i = 0; i < (d - 1); i++) { digits[i] = (unsigned int)((scalar[0] & 1) - 1); // Convention for the "sign" row: // if scalar_(i+1) = 0 then digit_i = -1 // (negative), else if scalar_(i+1) = 1 // then digit_i = 0 (positive) // Shift scalar to the right by 1 for (j = 0; j < (NWORDS64_ORDER - 1); j++) { SHIFTR(scalar[j + 1], scalar[j], 1, scalar[j], RADIX64); } scalar[NWORDS64_ORDER - 1] >>= 1; } for (i = d; i < l; i++) { digits[i] = (unsigned int)(scalar[0] & 1); // digits_i = k mod 2. Sign is determined by the // "sign" row // Shift scalar to the right by 1 for (j = 0; j < (NWORDS64_ORDER - 1); j++) { SHIFTR(scalar[j + 1], scalar[j], 1, scalar[j], RADIX64); } scalar[NWORDS64_ORDER - 1] >>= 1; temp = (0 - digits[i - (i / d) * d]) & digits[i]; // if (digits_i=0 \/ 1) then temp = 0, else if (digits_i=-1) then temp = 1 // floor(scalar/2) + temp scalar[0] = scalar[0] + temp; carry = (temp & (uint64_t)is_digit_zero_ct((digit_t)scalar[0])); // carry = (scalar[0] < temp); for (j = 1; j < NWORDS64_ORDER; j++) { scalar[j] = scalar[j] + carry; carry = (carry & (uint64_t)is_digit_zero_ct((digit_t)scalar[j])); // carry = (scalar[j] < temp); } } return; } #if (USE_ENDO == true) static void eccneg_extproj_precomp(point_extproj_precomp_t P, point_extproj_precomp_t Q) { // Point negation // Input : point P in coordinates (X+Y,Y-X,2Z,2dT) // Output: point Q = -P = (Y-X,X+Y,2Z,-2dT) fp2copy1271(P->t2, Q->t2); fp2copy1271(P->xy, Q->yx); fp2copy1271(P->yx, Q->xy); fp2copy1271(P->z2, Q->z2); fp2neg1271(Q->t2); } static void eccneg_precomp(point_precomp_t P, point_precomp_t Q) { // Point negation // Input : point P in coordinates (x+y,y-x,2dt) // Output: point Q = -P = (y-x,x+y,-2dt) fp2copy1271(P->t2, Q->t2); fp2copy1271(P->xy, Q->yx); fp2copy1271(P->yx, Q->xy); fp2neg1271(Q->t2); } #endif bool ecc_mul_double(digit_t *k, point_t Q, digit_t *l, point_t R) { // Double scalar multiplication R = k*G + l*Q, where the G is the generator. Uses // DOUBLE_SCALAR_TABLE, which contains multiples of G, Phi(G), Psi(G) and Phi(Psi(G)). Inputs: // point Q in affine coordinates, // scalars "k" and "l" in [0, 2^256-1]. // Output: R = k*G + l*Q in affine coordinates (x,y). // The function uses wNAF with interleaving. // SECURITY NOTE: this function is intended for a non-constant-time operation such as signature // verification. #if (USE_ENDO == true) unsigned int position; int i, digits_k1[65] = { 0 }, digits_k2[65] = { 0 }, digits_k3[65] = { 0 }, digits_k4[65] = { 0 }; int digits_l1[65] = { 0 }, digits_l2[65] = { 0 }, digits_l3[65] = { 0 }, digits_l4[65] = { 0 }; point_precomp_t V; point_extproj_t Q1, Q2, Q3, Q4, T; point_extproj_precomp_t U, Q_table1[NPOINTS_DOUBLEMUL_WQ], Q_table2[NPOINTS_DOUBLEMUL_WQ], Q_table3[NPOINTS_DOUBLEMUL_WQ], Q_table4[NPOINTS_DOUBLEMUL_WQ]; uint64_t k_scalars[4], l_scalars[4]; point_setup(Q, Q1); // Convert to representation (X,Y,1,Ta,Tb) if (ecc_point_validate(Q1) == false) { // Check if point lies on the curve return false; } // Computing endomorphisms over point Q ecccopy(Q1, Q2); ecc_phi(Q2); ecccopy(Q1, Q3); ecc_psi(Q3); ecccopy(Q2, Q4); ecc_psi(Q4); decompose((uint64_t *)k, k_scalars); // Scalar decomposition decompose((uint64_t *)l, l_scalars); wNAF_recode(k_scalars[0], WP_DOUBLEBASE, digits_k1); // Scalar recoding wNAF_recode(k_scalars[1], WP_DOUBLEBASE, digits_k2); wNAF_recode(k_scalars[2], WP_DOUBLEBASE, digits_k3); wNAF_recode(k_scalars[3], WP_DOUBLEBASE, digits_k4); wNAF_recode(l_scalars[0], WQ_DOUBLEBASE, digits_l1); wNAF_recode(l_scalars[1], WQ_DOUBLEBASE, digits_l2); wNAF_recode(l_scalars[2], WQ_DOUBLEBASE, digits_l3); wNAF_recode(l_scalars[3], WQ_DOUBLEBASE, digits_l4); ecc_precomp_double(Q1, Q_table1, NPOINTS_DOUBLEMUL_WQ); // Precomputation ecc_precomp_double(Q2, Q_table2, NPOINTS_DOUBLEMUL_WQ); ecc_precomp_double(Q3, Q_table3, NPOINTS_DOUBLEMUL_WQ); ecc_precomp_double(Q4, Q_table4, NPOINTS_DOUBLEMUL_WQ); fp2zero1271(T->x); // Initialize T as the neutral point (0:1:1) fp2zero1271(T->y); T->y[0][0] = 1; fp2zero1271(T->z); T->z[0][0] = 1; for (i = 64; i >= 0; i--) { eccdouble(T); // Double (X_T,Y_T,Z_T,Ta_T,Tb_T) = 2(X_T,Y_T,Z_T,Ta_T,Tb_T) if (digits_l1[i] < 0) { position = (-digits_l1[i]) / 2; eccneg_extproj_precomp( Q_table1[position], U); // Load and negate U = (X_U,Y_U,Z_U,Td_U) <- // -(X+Y,Y-X,2Z,2dT) from a point in the precomputed table eccadd(U, T); // T = T+U = (X_T,Y_T,Z_T,Ta_T,Tb_T) = (X_T,Y_T,Z_T,Ta_T,Tb_T) + // (X_U,Y_U,Z_U,Td_U) } else if (digits_l1[i] > 0) { position = (digits_l1[i]) / 2; // Take U = (X_U,Y_U,Z_U,Td_U) <- (X+Y,Y-X,2Z,2dT) from a // point in the precomputed table eccadd(Q_table1[position], T); // T = T+U = (X_T,Y_T,Z_T,Ta_T,Tb_T) = // (X_T,Y_T,Z_T,Ta_T,Tb_T) + (X_U,Y_U,Z_U,Td_U) } if (digits_l2[i] < 0) { position = (-digits_l2[i]) / 2; eccneg_extproj_precomp(Q_table2[position], U); eccadd(U, T); } else if (digits_l2[i] > 0) { position = (digits_l2[i]) / 2; eccadd(Q_table2[position], T); } if (digits_l3[i] < 0) { position = (-digits_l3[i]) / 2; eccneg_extproj_precomp(Q_table3[position], U); eccadd(U, T); } else if (digits_l3[i] > 0) { position = (digits_l3[i]) / 2; eccadd(Q_table3[position], T); } if (digits_l4[i] < 0) { position = (-digits_l4[i]) / 2; eccneg_extproj_precomp(Q_table4[position], U); eccadd(U, T); } else if (digits_l4[i] > 0) { position = (digits_l4[i]) / 2; eccadd(Q_table4[position], T); } if (digits_k1[i] < 0) { position = (-digits_k1[i]) / 2; eccneg_precomp( ((point_precomp_t *)&DOUBLE_SCALAR_TABLE)[position], V); // Load and negate V = (X_V,Y_V,Z_V,Td_V) <- -(x+y,y-x,2dt) from a point in the // precomputed table eccmadd(V, T); // T = T+V = (X_T,Y_T,Z_T,Ta_T,Tb_T) = (X_T,Y_T,Z_T,Ta_T,Tb_T) + // (X_V,Y_V,Z_V,Td_V) } else if (digits_k1[i] > 0) { position = (digits_k1[i]) / 2; // Take V = (X_V,Y_V,Z_V,Td_V) <- (x+y,y-x,2dt) from a // point in the precomputed table eccmadd( ((point_precomp_t *)&DOUBLE_SCALAR_TABLE)[position], T); // T = T+V = (X_T,Y_T,Z_T,Ta_T,Tb_T) = (X_T,Y_T,Z_T,Ta_T,Tb_T) + // (X_V,Y_V,Z_V,Td_V) } if (digits_k2[i] < 0) { position = (-digits_k2[i]) / 2; eccneg_precomp( ((point_precomp_t *)&DOUBLE_SCALAR_TABLE)[NPOINTS_DOUBLEMUL_WP + position], V); eccmadd(V, T); } else if (digits_k2[i] > 0) { position = (digits_k2[i]) / 2; eccmadd(((point_precomp_t *)&DOUBLE_SCALAR_TABLE)[NPOINTS_DOUBLEMUL_WP + position], T); } if (digits_k3[i] < 0) { position = (-digits_k3[i]) / 2; eccneg_precomp( ((point_precomp_t *)&DOUBLE_SCALAR_TABLE)[2 * NPOINTS_DOUBLEMUL_WP + position], V); eccmadd(V, T); } else if (digits_k3[i] > 0) { position = (digits_k3[i]) / 2; eccmadd( ((point_precomp_t *)&DOUBLE_SCALAR_TABLE)[2 * NPOINTS_DOUBLEMUL_WP + position], T); } if (digits_k4[i] < 0) { position = (-digits_k4[i]) / 2; eccneg_precomp( ((point_precomp_t *)&DOUBLE_SCALAR_TABLE)[3 * NPOINTS_DOUBLEMUL_WP + position], V); eccmadd(V, T); } else if (digits_k4[i] > 0) { position = (digits_k4[i]) / 2; eccmadd( ((point_precomp_t *)&DOUBLE_SCALAR_TABLE)[3 * NPOINTS_DOUBLEMUL_WP + position], T); } } #else point_t A; point_extproj_t T; point_extproj_precomp_t S; if (ecc_mul(Q, l, A, false) == false) { return false; } point_setup(A, T); R1_to_R2(T, S); ecc_mul_fixed(k, A); point_setup(A, T); eccadd(S, T); #endif eccnorm(T, R); // Output R = (x,y) return true; } void ecc_precomp_double(point_extproj_t P, point_extproj_precomp_t *Table, unsigned int npoints) { // Generation of the precomputation table used internally by the double scalar multiplication // function ecc_mul_double(). Inputs: point P in representation (X,Y,Z,Ta,Tb), // Table with storage for npoints, // number of points "npoints". // Output: Table containing multiples of the base point P using representation (X+Y,Y-X,2Z,2dT). point_extproj_t Q; point_extproj_precomp_t PP; unsigned int i; R1_to_R2(P, Table[0]); // Precomputed point Table[0] = P in coordinates (X+Y,Y-X,2Z,2dT) eccdouble(P); // A = 2*P in (X,Y,Z,Ta,Tb) R1_to_R3(P, PP); // Converting from (X,Y,Z,Ta,Tb) to (X+Y,Y-X,Z,T) for (i = 1; i < npoints; i++) { eccadd_core(Table[i - 1], PP, Q); // Table[i] = Table[i-1]+2P using the representations // (X,Y,Z,Ta,Tb) <- (X+Y,Y-X,2Z,2dT) + (X+Y,Y-X,Z,T) R1_to_R2(Q, Table[i]); // Converting from (X,Y,Z,Ta,Tb) to (X+Y,Y-X,2Z,2dT) } return; } void wNAF_recode(uint64_t scalar, unsigned int w, int *digits) { // Computes wNAF recoding of a scalar, where digits are in set {0,+-1,+-3,...,+-(2^(w-1)-1)} unsigned int i; int digit, index = 0; int val1 = (int)(1 << (w - 1)) - 1; // 2^(w-1) - 1 int val2 = (int)(1 << w); // 2^w; uint64_t k = scalar, mask = (uint64_t)val2 - 1; // 2^w - 1 while (k != 0) { digit = (int)(k & 1); if (digit == 0) { k >>= 1; // Shift scalar to the right by 1 digits[index] = 0; } else { digit = (int)(k & mask); k >>= w; // Shift scalar to the right by w if (digit > val1) { digit -= val2; } if (digit < 0) { // scalar + 1 k += 1; } digits[index] = digit; if (k != 0) { // Check if scalar != 0 for (i = 0; i < (w - 1); i++) { index++; digits[index] = 0; } } } index++; } return; }