// // This file is part of // // CTBignum // // C++ Library for Compile-Time and Run-Time Multi-Precision and Modular Arithmetic // // // This file is distributed under the Apache License, Version 2.0. See the LICENSE // file for details. #ifndef CT_MONTGOMERY_HPP #define CT_MONTGOMERY_HPP #include #include #include #include #include #include #include #include // std::size_t #include namespace cbn { template constexpr auto montgomery_reduction(big_int A, std::integer_sequence) { // Montgomery reduction with compile-time modulus // // inputs: // A (2n limbs) number to be reduced // m ( n limbs) modulus // // output: // T R^-1 mod m, where R = (2^64)^n // using detail::skip; using detail::first; using detail::unary_encoding; using detail::pad; using detail::limbwise_shift_left; using std::integer_sequence; constexpr auto m = big_int{Modulus...}; constexpr auto inv = mod_inv(integer_sequence{}, integer_sequence{}); // m^{-1} mod 2^64 constexpr T mprime = -inv[0]; auto accum = pad<1>(A); for (auto i = 0; i < N2; ++i) { auto prod = short_mul(m, accum[i] * mprime); auto prod2 = limbwise_shift_left(prod, i); accum = add_ignore_carry(accum, prod2); } auto result = skip(accum); auto padded_mod = pad<1>(m); if (result >= padded_mod) result = subtract_ignore_carry(result, padded_mod); return first(result); } template CBN_ALWAYS_INLINE constexpr auto montgomery_mul(big_int x, big_int y, std::integer_sequence) { // Montgomery multiplication with compile-time modulus using detail::skip; using detail::first; using detail::pad; using std::integer_sequence; using TT = typename dbl_bitlen::type; constexpr auto m = big_int{Modulus...}; constexpr auto inv = mod_inv(integer_sequence{}, integer_sequence{}); // m^{-1} mod 2^64 constexpr T mprime = -inv[0]; big_int A{}; for (auto i = 0; i < N; ++i) { T u_i = (A[0] + x[i] * y[0]) * mprime; // A += x[i] * y + u_i * m followed by a 1 limb-shift to the right T k = 0; T k2 = 0; TT z = static_cast(y[0]) * static_cast(x[i]) + A[0] + k; TT z2 = static_cast(m[0]) * static_cast(u_i) + static_cast(z) + k2; k = z >> std::numeric_limits::digits; k2 = z2 >> std::numeric_limits::digits; for (auto j = 1; j < N; ++j) { TT t = static_cast(y[j]) * static_cast(x[i]) + A[j] + k; TT t2 = static_cast(m[j]) * static_cast(u_i) + static_cast(t) + k2; A[j-1] = t2; k = t >> std::numeric_limits::digits; k2 = t2 >> std::numeric_limits::digits; } TT tmp = static_cast(A[N]) + k + k2; A[N-1] = tmp; A[N] = tmp >> std::numeric_limits::digits; } auto padded_mod = pad<1>(m); if (A >= padded_mod) A = subtract_ignore_carry(A, padded_mod); return first(A); } namespace { // Define a template that can be used to prevent type deduction of a parameter. template struct Identity { typedef T type; }; template using Identity_t = typename Identity::type; } // anonymous namespace // Runtime-parameter variants /// Note: the type of the last parameter is not deduced from itself, but from /// the other parameters instead. template constexpr auto montgomery_reduction(big_int A, big_int m, Identity_t mprime) { // Montgomery reduction with runtime parameters // // inputs: // A (2n limbs) number to be reduced // m ( n limbs) modulus // mprime (uint64_t) mprime = - m^{-1} mod 2^64 // // output: // T R^-1 mod m, where R = (2^64)^n // using detail::skip; using detail::first; using detail::unary_encoding; using detail::pad; using detail::limbwise_shift_left; auto accum = pad<1>(A); for (auto i = 0; i < N2; ++i) { auto prod = short_mul(m, accum[i] * mprime); auto prod2 = limbwise_shift_left(prod, i); accum = add_ignore_carry(accum, prod2); } auto result = skip(accum); auto padded_mod = pad<1>(m); if (result >= padded_mod) result = subtract_ignore_carry(result, padded_mod); return first(result); } /// Note: the type of the last parameter is not deduced from itself, but from /// the other parameters instead. template CBN_ALWAYS_INLINE constexpr auto montgomery_mul(big_int x, big_int y, big_int m, Identity_t mprime) { // Montgomery multiplication with runtime parameters using detail::skip; using detail::first; using detail::pad; using TT = typename dbl_bitlen::type; big_int A{}; for (auto i = 0; i < N; ++i) { T u_i = (A[0] + x[i] * y[0]) * mprime; // A += x[i] * y + u_i * m followed by a 1 limb-shift to the right T k = 0; T k2 = 0; TT z = static_cast(y[0]) * static_cast(x[i]) + A[0] + k; TT z2 = static_cast(m[0]) * static_cast(u_i) + static_cast(z) + k2; k = z >> std::numeric_limits::digits; k2 = z2 >> std::numeric_limits::digits; for (auto j = 1; j < N; ++j) { TT t = static_cast(y[j]) * static_cast(x[i]) + A[j] + k; TT t2 = static_cast(m[j]) * static_cast(u_i) + static_cast(t) + k2; A[j-1] = t2; k = t >> std::numeric_limits::digits; k2 = t2 >> std::numeric_limits::digits; } TT tmp = static_cast(A[N]) + k + k2; A[N-1] = tmp; A[N] = tmp >> std::numeric_limits::digits; } auto padded_mod = pad<1>(m); if (A >= padded_mod) A = subtract_ignore_carry(A, padded_mod); return first(A); } namespace detail { template CBN_ALWAYS_INLINE constexpr T inverse_mod(T a) { // inverse modulo 2^(limb-width) (needed for the montgomery representation) T x = ((a << 1 ^ a) & 4) << 1 ^ a; x += x - a * x * x; if constexpr (std::numeric_limits::digits >= 16) x += x - a * x * x; if constexpr (std::numeric_limits::digits >= 32) x += x - a * x * x; if constexpr (std::numeric_limits::digits >= 64) x += x - a * x * x; return x; } } } #endif