#! Given [b, c, a, carry] on stack top, following function computes #! #! tmp = a + (b * c) + carry #! hi = tmp >> 32 #! lo = tmp & 0xffff_ffff #! return (hi, lo) #! #! At end of execution of this function, stack top should look like [hi, lo] #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_utils.py#L41-L46 proc.mac u32overflowing_madd movdn.2 u32overflowing_add movup.2 add end #! Given [a, b, borrow] on stack top, following function computes #! #! tmp = a - (b + borrow) #! hi = tmp >> 32 #! lo = tmp & 0xffff_ffff #! return (hi, lo) #! #! At end of execution of this function, stack top should look like [hi, lo] #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_utils.py#L49-L55 proc.sbb movdn.2 add u32overflowing_sub end #! Given a secp256k1 base field element in radix-2^32 representation ( Montgomery form ) #! and 32 -bit unsigned integer, this routine computes a 288 -bit number. #! #! Input via stack is expected in this form #! #! [a0, a1, a2, a3, a4, a5, a6, a7, b, ...] | a[0..8] -> 256 -bit number, b = 32 -bit number #! #! Computed output looks like below, on stack #! #! [carry, b7, b6, b5, b4, b3, b2, b1, b0, ...] #! #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_utils.py#L65-L83 proc.u256xu32 movup.8 push.0 dup.1 movup.3 u32overflowing_madd dup.2 movup.4 u32overflowing_madd dup.3 movup.5 u32overflowing_madd dup.4 movup.6 u32overflowing_madd dup.5 movup.7 u32overflowing_madd dup.6 movup.8 u32overflowing_madd dup.7 movup.9 u32overflowing_madd movup.8 movup.9 u32overflowing_madd end #! Given a 288 -bit number and 256 -bit number on stack ( in order ), this routine #! computes a 288 -bit number, by adding the 256 -bit number to other operand #! #! Expected stack state during routine invocation #! #! [carry, b7, b6, b5, b4, b3, b2, b1, b0, c0, c1, c2, c3, c4, c5, c6, c7, ...] #! #! While after execution of this routine, stack should look like #! #! [d0, d1, d2, d3, d4, d5, d6, d7, carry, ...] #! #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_utils.py#L85-L98 proc.u288_add_u256 swapw movupw.2 u32overflowing_add movup.2 movup.7 u32overflowing_add3 movup.3 movup.6 u32overflowing_add3 movup.4 movup.5 movupw.2 movup.2 movup.4 movup.6 u32overflowing_add3 movup.5 movup.5 u32overflowing_add3 movup.3 movup.4 movupw.2 movup.2 movup.4 movup.6 u32overflowing_add3 movup.5 movup.5 u32overflowing_add3 movup.10 movup.5 u32overflowing_add3 movup.4 add swap movup.2 movup.3 movup.4 movup.5 movup.6 movup.7 movup.8 end #! Given [c0, c1, c2, c3, c4, c5, c6, c7, c8, pc, ...] on stack top, #! this function attempts to reduce 288 -bit number to a 256 -bit number #! along with carry, using montgomery reduction method. The modulo to which this #! reduction is performed is secp256k1 base field prime. #! #! Find secp256k1 base field prime https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_consts.py#L19-L21 #! #! In stack top content c[0..9] i.e. first 9 elements, holding 288 -bit #! number. Stack element `pc` ( at stack[9] ) is previous reduction round's #! carry ( for first reduction round, it'll be set to 0 ). #! #! After finishing execution of this function, stack top should look like #! #! [c0, c1, c2, c3, c4, c5, c6, c7, pc, ...] | pc = next round's carry #! #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_utils.py#L118-L126 proc.u288_reduce dup push.3525653809 u32wrapping_mul # q at stack top # push.0 movup.2 push.4294966319 dup.3 exec.mac swap drop movup.2 push.4294967294 dup.3 exec.mac movup.3 push.4294967295 dup.4 exec.mac movup.4 push.4294967295 dup.5 exec.mac movup.5 push.4294967295 dup.6 exec.mac movup.6 push.4294967295 dup.7 exec.mac movup.7 dup.7 push.4294967295 exec.mac movup.7 movup.8 swap push.4294967295 exec.mac movup.9 movup.9 u32overflowing_add3 swap movup.2 movup.3 movup.4 movup.5 movup.6 movup.7 movup.8 end #! Given two 256 -bit numbers ( elements belonging to secp256k1 base field ) on stack, #! where each number is represented in radix-2^32 form ( i.e. each number having eight #! 32 -bit limbs ), following function computes modular multiplication of those two #! operands, computing 256 -bit result, which belongs to secp256k1 base field. #! #! Stack expected as below, holding input #! #! [a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7, ...] | a[0..8], b[0..8] are 256 -bit numbers #! #! After finishing execution of this function, stack should look like #! #! [c0, c1, c2, c3, c4, c5, c6, c7, ...] | c[0..8] is a 256 -bit number #! #! Note, for computing modular multiplication of a[0..8] & b[0..8], #! school book multiplication equipped with Montgomery reduction technique #! is used, which is why a[0..8], b[0..8] are expected to be in Montgomery form, #! while computed c[0..8] will also be in Montgomery form. #! #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_utils.py#L101-L222 export.mul.2 loc_storew.0 swapw loc_storew.1 swapw exec.u256xu32 swap movup.2 movup.3 movup.4 movup.5 movup.6 movup.7 movup.8 push.0 movdn.9 exec.u288_reduce movup.9 push.0.0.0.0 loc_loadw.1 push.0.0.0.0 loc_loadw.0 exec.u256xu32 exec.u288_add_u256 exec.u288_reduce movup.9 push.0.0.0.0 loc_loadw.1 push.0.0.0.0 loc_loadw.0 exec.u256xu32 exec.u288_add_u256 exec.u288_reduce movup.9 push.0.0.0.0 loc_loadw.1 push.0.0.0.0 loc_loadw.0 exec.u256xu32 exec.u288_add_u256 exec.u288_reduce movup.9 push.0.0.0.0 loc_loadw.1 push.0.0.0.0 loc_loadw.0 exec.u256xu32 exec.u288_add_u256 exec.u288_reduce movup.9 push.0.0.0.0 loc_loadw.1 push.0.0.0.0 loc_loadw.0 exec.u256xu32 exec.u288_add_u256 exec.u288_reduce movup.9 push.0.0.0.0 loc_loadw.1 push.0.0.0.0 loc_loadw.0 exec.u256xu32 exec.u288_add_u256 exec.u288_reduce movup.9 push.0.0.0.0 loc_loadw.1 push.0.0.0.0 loc_loadw.0 exec.u256xu32 exec.u288_add_u256 exec.u288_reduce movup.8 movup.2 dup.1 add movup.2 movup.2 push.977 u32overflowing_madd drop end #! Just a wrapper function for ease of squaring an element of secp256k1 base field. #! #! Expected stack state #! #! [a0, a1, a2, a3, a4, a5, a6, a7, ...] | a[0..8] is a 256 -bit number #! #! Final stack state #! #! [b0, b1, b2, b3, b4, b5, b6, b7, ...] | b[0..8] is a 256 -bit number s.t. b = a * a proc.sqr dupw.1 dupw.1 exec.mul end #! Given two 256 -bit numbers ( elements belonging to secp256k1 base field ) on stack, #! where each number is represented in radix-2^32 form ( i.e. each number having eight #! 32 -bit limbs ), following function computes modular addition of those two operands, #! in secp256k1 base field. #! #! Stack expected as below, holding input #! #! [a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7, ...] | a[0..8], b[0..8] are 256 -bit numbers #! #! After finishing execution of this function, stack should look like #! #! [c0, c1, c2, c3, c4, c5, c6, c7, ...] | c[0..8] is a 256 -bit number #! #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field.py#L57-L76 export.add movupw.2 push.0 movup.5 u32overflowing_add3 movup.2 movup.5 u32overflowing_add3 movup.3 movup.5 u32overflowing_add3 movup.4 movup.5 u32overflowing_add3 movup.5 movup.9 u32overflowing_add3 movup.6 movup.9 u32overflowing_add3 movup.7 movup.9 u32overflowing_add3 movup.8 movup.9 u32overflowing_add3 movup.8 dup.1 push.977 u32overflowing_madd drop swap movup.8 add movup.2 movup.3 movup.4 movup.5 movup.6 movup.7 movup.6 movup.7 end #! Given a secp256k1 base field element ( say a ) on stack, represented in Montgomery form #! ( i.e. number having eight 32 -bit limbs ), following function negates it to #! field element a' | a' + a = 0 #! #! Stack expected as below, holding input #! #! [a0, a1, a2, a3, a4, a5, a6, a7, ...] | a[0..8] is a secp256k1 base field element #! #! After finishing execution of this function, stack should look like #! #! [c0, c1, c2, c3, c4, c5, c6, c7, ...] | c[0..8] is a secp256k1 base field element #! #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field.py#L78-L96 export.neg push.0 swap push.4294966319 exec.sbb movup.2 push.4294967294 exec.sbb movup.3 push.4294967295 exec.sbb movup.4 push.4294967295 exec.sbb movup.5 push.4294967295 exec.sbb movup.6 push.4294967295 exec.sbb movup.7 push.4294967295 exec.sbb movup.8 push.4294967295 exec.sbb drop swap movup.2 movup.3 movup.4 movup.5 movup.6 movup.7 end #! Given two secp256k1 base field elements, say a, b, ( represented in Montgomery form, #! each number having eight 32 -bit limbs ) on stack, following function computes modular #! subtraction of those two operands c = a + (-b) = a - b #! #! Stack expected as below, holding input #! #! [a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3, b4, b5, b6, b7, ...] | a[0..8], b[0..8] are secp256k1 base field elements #! #! After finishing execution of this function, stack should look like #! #! [c0, c1, c2, c3, c4, c5, c6, c7, ...] | c[0..8] is a secp256k1 base field element #! #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field.py#L98-L102 export.sub movupw.3 movupw.3 exec.neg exec.add end #! Given a 256 -bit number on stack, represented in radix-2^32 form i.e. eight 32 -bit limbs, #! this routine computes Montgomery representation of provided radix-2^32 number. #! #! Stack expected in form #! #! [a0, a1, a2, a3, a4, a5, a6, a7, ...] #! #! Final stack should look like #! #! [a0', a1', a2', a3', a4', a5', a6', a7', ...] #! #! See section 2.2 of https://eprint.iacr.org/2017/1057.pdf #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_utils.py#L225-L232 #! for implementation export.to_mont push.0.0.0.0 push.0.1.1954.954529 # pushed R2's radix-2^32 form; # see https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_consts.py#L31 exec.mul end #! Given a 256 -bit number on stack, represented in Montgomery form i.e. eight 32 -bit limbs, #! this routine computes radix-2^32 representation of provided u256 number. #! #! Stack expected as #! #! [a0, a1, a2, a3, a4, a5, a6, a7, ...] #! #! Final stack should look like #! #! [a0', a1', a2', a3', a4', a5', a6', a7', ...] #! #! See section 2.2 of https://eprint.iacr.org/2017/1057.pdf #! See https://github.com/itzmeanjan/secp256k1/blob/6e5e654823a073add7d62b21ed88e9de9bb06869/field/base_field_utils.py#L235-L241 #! for implementation export.from_mont push.0.0.0.0 push.0.0.0.1 # pushed 1's radix-2^32 form; exec.mul end #! Given an element ( say a ) of secp256k1 base field, this routine computes multiplicative #! inverse ( say a' ) of that element s.t. a * a' = 1 ( mod p ) | p = secp256k1 base field prime #! #! Expected stack state #! #! [a0, a1, a2, a3, a4, a5, a6, a7, ...] | a[0..8] is a 256 -bit number #! #! Final stack state #! #! [b0, b1, b2, b3, b4, b5, b6, b7, ...] | b[0..8] is a 256 -bit number s.t. b = a^-1 ( mod p ) #! #! Note, both input and output stays in Montgomery form. If 0 is input operand, then multiplicative #! inverse can't be computed, which is why output result is also 0. #! #! See https://github.com/itzmeanjan/secp256k1/blob/37b339db3e03d24c2977399eb8896ef515ebb09b/field/base_field.py#L114-L132 export.inv.4 # cache result initial value ( = 1, in Montgomery form ) push.0.0.0.0.0.0.1.977 loc_storew.0 dropw loc_storew.1 dropw # cache base loc_storew.2 dropw loc_storew.3 dropw push.4294966317.4294967294.4294967295.4294967295.4294967295.4294967295.4294967295.4294967295 repeat.8 repeat.32 push.0.0.0.0.0.0.0.0 loc_loadw.1 swapw loc_loadw.0 exec.sqr loc_storew.0 dropw loc_storew.1 dropw dup u32shr.31 if.true push.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0 loc_loadw.3 swapw loc_loadw.2 swapdw loc_loadw.1 swapw loc_loadw.0 exec.mul loc_storew.0 dropw loc_storew.1 dropw end u32shl.1 end drop end push.0.0.0.0.0.0.0.0 loc_loadw.1 swapw loc_loadw.0 end