#ifndef PQCLEAN_FALCON1024_CLEAN_FPR_H #define PQCLEAN_FALCON1024_CLEAN_FPR_H /* * Floating-point operations. * * ==========================(LICENSE BEGIN)============================ * * Copyright (c) 2017-2019 Falcon Project * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * ===========================(LICENSE END)============================= * * @author Thomas Pornin */ /* ====================================================================== */ /* * Custom floating-point implementation with integer arithmetics. We * use IEEE-754 "binary64" format, with some simplifications: * * - Top bit is s = 1 for negative, 0 for positive. * * - Exponent e uses the next 11 bits (bits 52 to 62, inclusive). * * - Mantissa m uses the 52 low bits. * * Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52)) * i.e. the mantissa really is a 53-bit number (less than 2.0, but not * less than 1.0), but the top bit (equal to 1 by definition) is omitted * in the encoding. * * In IEEE-754, there are some special values: * * - If e = 2047, then the value is either an infinite (m = 0) or * a NaN (m != 0). * * - If e = 0, then the value is either a zero (m = 0) or a subnormal, * aka "denormalized number" (m != 0). * * Of these, we only need the zeros. The caller is responsible for not * providing operands that would lead to infinites, NaNs or subnormals. * If inputs are such that values go out of range, then indeterminate * values are returned (it would still be deterministic, but no specific * value may be relied upon). * * At the C level, the three parts are stored in a 64-bit unsigned * word. * * One may note that a property of the IEEE-754 format is that order * is preserved for positive values: if two positive floating-point * values x and y are such that x < y, then their respective encodings * as _signed_ 64-bit integers i64(x) and i64(y) will be such that * i64(x) < i64(y). For negative values, order is reversed: if x < 0, * y < 0, and x < y, then ia64(x) > ia64(y). * * IMPORTANT ASSUMPTIONS: * ====================== * * For proper computations, and constant-time behaviour, we assume the * following: * * - 32x32->64 multiplication (unsigned) has an execution time that * is independent of its operands. This is true of most modern * x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+ * and M3 (in the M0 and M0+, this is done in software, so it depends * on that routine), and the PowerPC cores from the G3/G4 lines. * For more info, see: https://www.bearssl.org/ctmul.html * * - Left-shifts and right-shifts of 32-bit values have an execution * time which does not depend on the shifted value nor on the * shift count. An historical exception is the Pentium IV, but most * modern CPU have barrel shifters. Some small microcontrollers * might have varying-time shifts (not the ARM Cortex M*, though). * * - Right-shift of a signed negative value performs a sign extension. * As per the C standard, this operation returns an * implementation-defined result (this is NOT an "undefined * behaviour"). On most/all systems, an arithmetic shift is * performed, because this is what makes most sense. */ /* * Normally we should declare the 'fpr' type to be a struct or union * around the internal 64-bit value; however, we want to use the * direct 64-bit integer type to enable a lighter call convention on * ARM platforms. This means that direct (invalid) use of operators * such as '*' or '+' will not be caught by the compiler. We rely on * the "normal" (non-emulated) code to detect such instances. */ typedef uint64_t fpr; /* * For computations, we split values into an integral mantissa in the * 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is * "sticky" (it is set to 1 if any of the bits below it is 1); when * re-encoding, the low two bits are dropped, but may induce an * increment in the value for proper rounding. */ /* * Right-shift a 64-bit unsigned value by a possibly secret shift count. * We assumed that the underlying architecture had a barrel shifter for * 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will * typically invoke a software routine that is not necessarily * constant-time; hence the function below. * * Shift count n MUST be in the 0..63 range. */ #define fpr_ursh PQCLEAN_FALCON1024_CLEAN_fpr_ursh uint64_t fpr_ursh(uint64_t x, int n); /* * Right-shift a 64-bit signed value by a possibly secret shift count * (see fpr_ursh() for the rationale). * * Shift count n MUST be in the 0..63 range. */ #define fpr_irsh PQCLEAN_FALCON1024_CLEAN_fpr_irsh int64_t fpr_irsh(int64_t x, int n); /* * Left-shift a 64-bit unsigned value by a possibly secret shift count * (see fpr_ursh() for the rationale). * * Shift count n MUST be in the 0..63 range. */ #define fpr_ulsh PQCLEAN_FALCON1024_CLEAN_fpr_ulsh uint64_t fpr_ulsh(uint64_t x, int n); /* * Expectations: * s = 0 or 1 * exponent e is "arbitrary" and unbiased * 2^54 <= m < 2^55 * Numerical value is (-1)^2 * m * 2^e * * Exponents which are too low lead to value zero. If the exponent is * too large, the returned value is indeterminate. * * If m = 0, then a zero is returned (using the provided sign). * If e < -1076, then a zero is returned (regardless of the value of m). * If e >= -1076 and e != 0, m must be within the expected range * (2^54 to 2^55-1). */ #define FPR PQCLEAN_FALCON1024_CLEAN_FPR fpr FPR(int s, int e, uint64_t m); #define fpr_scaled PQCLEAN_FALCON1024_CLEAN_fpr_scaled fpr fpr_scaled(int64_t i, int sc); #define fpr_of PQCLEAN_FALCON1024_CLEAN_fpr_of fpr fpr_of(int64_t i); static const fpr fpr_q = 4667981563525332992; static const fpr fpr_inverse_of_q = 4545632735260551042; static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306; static const fpr fpr_inv_sigma = 4573359825155195350; static const fpr fpr_sigma_min_9 = 4608495221497168882; static const fpr fpr_sigma_min_10 = 4608586345619182117; static const fpr fpr_log2 = 4604418534313441775; static const fpr fpr_inv_log2 = 4609176140021203710; static const fpr fpr_bnorm_max = 4670353323383631276; static const fpr fpr_zero = 0; static const fpr fpr_one = 4607182418800017408; static const fpr fpr_two = 4611686018427387904; static const fpr fpr_onehalf = 4602678819172646912; static const fpr fpr_invsqrt2 = 4604544271217802189; static const fpr fpr_invsqrt8 = 4600040671590431693; static const fpr fpr_ptwo31 = 4746794007248502784; static const fpr fpr_ptwo31m1 = 4746794007244308480; static const fpr fpr_mtwo31m1 = 13970166044099084288U; static const fpr fpr_ptwo63m1 = 4890909195324358656; static const fpr fpr_mtwo63m1 = 14114281232179134464U; static const fpr fpr_ptwo63 = 4890909195324358656; #define fpr_rint PQCLEAN_FALCON1024_CLEAN_fpr_rint int64_t fpr_rint(fpr x); #define fpr_floor PQCLEAN_FALCON1024_CLEAN_fpr_floor int64_t fpr_floor(fpr x); #define fpr_trunc PQCLEAN_FALCON1024_CLEAN_fpr_trunc int64_t fpr_trunc(fpr x); #define fpr_add PQCLEAN_FALCON1024_CLEAN_fpr_add fpr fpr_add(fpr x, fpr y); #define fpr_sub PQCLEAN_FALCON1024_CLEAN_fpr_sub fpr fpr_sub(fpr x, fpr y); #define fpr_neg PQCLEAN_FALCON1024_CLEAN_fpr_neg fpr fpr_neg(fpr x); #define fpr_half PQCLEAN_FALCON1024_CLEAN_fpr_half fpr fpr_half(fpr x); #define fpr_double PQCLEAN_FALCON1024_CLEAN_fpr_double fpr fpr_double(fpr x); #define fpr_mul PQCLEAN_FALCON1024_CLEAN_fpr_mul fpr fpr_mul(fpr x, fpr y); #define fpr_sqr PQCLEAN_FALCON1024_CLEAN_fpr_sqr fpr fpr_sqr(fpr x); #define fpr_div PQCLEAN_FALCON1024_CLEAN_fpr_div fpr fpr_div(fpr x, fpr y); #define fpr_inv PQCLEAN_FALCON1024_CLEAN_fpr_inv fpr fpr_inv(fpr x); #define fpr_sqrt PQCLEAN_FALCON1024_CLEAN_fpr_sqrt fpr fpr_sqrt(fpr x); #define fpr_lt PQCLEAN_FALCON1024_CLEAN_fpr_lt int fpr_lt(fpr x, fpr y); /* * Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 * bits or so. */ #define fpr_expm_p63 PQCLEAN_FALCON1024_CLEAN_fpr_expm_p63 uint64_t fpr_expm_p63(fpr x, fpr ccs); #define fpr_gm_tab PQCLEAN_FALCON1024_CLEAN_fpr_gm_tab extern const fpr fpr_gm_tab[]; #define fpr_p2_tab PQCLEAN_FALCON1024_CLEAN_fpr_p2_tab extern const fpr fpr_p2_tab[]; /* ====================================================================== */ #endif