export.add_unsafe swapw.3 movup.3 movup.7 u32overflowing_add movup.4 movup.7 u32overflowing_add3 movup.4 movup.6 u32overflowing_add3 movup.4 movup.5 u32overflowing_add3 movdn.12 swapw.2 movup.12 movup.4 movup.8 u32overflowing_add3 movup.4 movup.7 u32overflowing_add3 movup.4 movup.6 u32overflowing_add3 movup.4 movup.5 u32overflowing_add3 drop end export.sub_unsafe swapw.3 movup.3 movup.7 u32overflowing_sub movup.7 u32overflowing_add movup.5 movup.2 u32overflowing_sub movup.2 add movup.6 u32overflowing_add movup.5 movup.2 u32overflowing_sub movup.2 add movup.5 u32overflowing_add movup.5 movup.2 u32overflowing_sub movup.2 add movdn.12 swapw.2 movup.12 movup.4 u32overflowing_add movup.8 movup.2 u32overflowing_sub movup.2 add movup.4 u32overflowing_add movup.7 movup.2 u32overflowing_sub movup.2 add movup.4 u32overflowing_add movup.6 movup.2 u32overflowing_sub movup.2 add movup.5 movup.5 movup.2 u32overflowing_add drop u32overflowing_sub drop end export.and swapw.3 movup.3 movup.7 u32and movup.3 movup.6 u32and movup.3 movup.5 u32and movup.3 movup.4 u32and swapw.2 movup.3 movup.7 u32and movup.3 movup.6 u32and movup.3 movup.5 u32and movup.3 movup.4 u32and end export.or swapw.3 movup.3 movup.7 u32or movup.3 movup.6 u32or movup.3 movup.5 u32or movup.3 movup.4 u32or swapw.2 movup.3 movup.7 u32or movup.3 movup.6 u32or movup.3 movup.5 u32or movup.3 movup.4 u32or end export.xor swapw.3 movup.3 movup.7 u32xor movup.3 movup.6 u32xor movup.3 movup.5 u32xor movup.3 movup.4 u32xor swapw.2 movup.3 movup.7 u32xor movup.3 movup.6 u32xor movup.3 movup.5 u32xor movup.3 movup.4 u32xor end export.iszero_unsafe eq.0 repeat.7 swap eq.0 and end end export.eq_unsafe swapw.3 eqw movdn.8 dropw dropw movdn.8 eqw movdn.8 dropw dropw and end # ===== MULTIPLICATION ============================================================================ proc.mulstep movdn.2 u32overflowing_madd movdn.2 u32overflowing_add movup.2 add end proc.mulstep4 movup.12 dup.1 movup.10 push.0 # start k at 0 exec.mulstep swap movdn.9 dup.1 movup.9 movup.13 swap.3 exec.mulstep swap movdn.8 dup.1 movup.8 movup.12 swap.3 exec.mulstep swap movdn.7 dup.1 movup.7 movup.11 swap.3 exec.mulstep swap movdn.6 end #! Performs addition of two unsigned 256 bit integers discarding the overflow. #! The input values are assumed to be represented using 32 bit limbs, but this is not checked. #! Stack transition looks as follows: #! [b7, b6, b5, b4, b3, b2, b1, b0, a7, a6, a5, a4, a3, a2, a1, a0, ...] -> [c7, c6, c5, c4, c3, c2, c1, c0, ...] #! where c = (a * b) % 2^256, and a0, b0, and c0 are least significant 32-bit limbs of a, b, and c respectively. export.mul_unsafe.6 # Memory storing setup loc_storew.0 dropw # b[5-8] at 0 loc_storew.1 # b[0-4] at 1 push.0 dropw # b[0] at top of stack, followed by a[0-7] movdn.8 loc_storew.2 # a[0-4] at 2 swapw loc_storew.3 # a[5-8] at 3 padw loc_storew.4 loc_storew.5 # p at 4 and 5 # b[0] dropw swapw push.0.0.0.0 loc_loadw.4 movdnw.2 movup.12 exec.mulstep4 movdn.9 movdn.9 swapw loc_storew.4 dropw push.0.0.0.0 loc_loadw.5 swapw movup.9 movup.9 dup.1 movup.6 movup.10 swap.3 exec.mulstep swap movdn.5 dup.1 movup.5 movup.9 swap.3 exec.mulstep swap movdn.4 dup.1 movup.4 movup.8 swap.3 exec.mulstep swap movdn.3 swap movup.2 movup.6 swap.3 exec.mulstep drop loc_storew.5 dropw # b[1] push.0.0.0.0 loc_loadw.4 push.0.0.0.0 loc_loadw.5 movup.7 dropw push.0.0.0.0 loc_loadw.3 push.0.0.0.0 loc_loadw.2 # load the xs push.0.0.0.0 loc_loadw.1 movup.2 movdn.3 push.0 dropw # only need b[1] exec.mulstep4 movdn.9 movdn.9 swapw movdn.3 push.0.0.0.0 loc_loadw.4 push.0 dropw # only need p[0] movdn.3 # save p[0-3] to memory, not needed any more loc_storew.4 dropw push.0.0.0.0 loc_loadw.5 movup.3 drop swapw movup.9 movup.9 dup.1 movup.6 movup.9 swap.3 exec.mulstep swap movdn.7 dup.1 movup.5 movup.7 swap.3 exec.mulstep swap movdn.5 swap movup.3 movup.4 swap.3 exec.mulstep drop swap drop loc_storew.5 dropw # b[2] push.0.0.0.0 loc_loadw.4 push.0.0.0.0 loc_loadw.5 movup.7 movup.7 dropw push.0.0.0.0 loc_loadw.3 push.0.0.0.0 loc_loadw.2 # load the xs push.0.0.0.0 loc_loadw.1 swap movdn.3 push.0 dropw # only need b[1] exec.mulstep4 movdn.9 movdn.9 swapw movdn.3 movdn.3 push.0.0.0.0 loc_loadw.4 drop drop movdn.3 movdn.3 loc_storew.4 dropw push.0.0.0.0 loc_loadw.5 movup.3 movup.3 drop drop swapw movup.9 movup.9 dup.1 movup.6 movup.8 swap.3 exec.mulstep swap movdn.6 dup.1 movup.5 movup.6 swap.3 exec.mulstep swap swap drop movdn.3 drop drop drop loc_storew.5 dropw # b[3] push.0.0.0.0 loc_loadw.4 push.0.0.0.0 loc_loadw.5 movup.7 movup.7 movup.7 dropw push.0.0.0.0 loc_loadw.3 push.0.0.0.0 loc_loadw.2 push.0.0.0.0 loc_loadw.1 movdn.3 push.0 dropw exec.mulstep4 movdn.9 movdn.9 swapw movup.3 push.0.0.0.0 loc_loadw.4 drop movup.3 loc_storew.4 dropw push.0.0.0.0 loc_loadw.5 movdn.3 push.0 dropw swapw movup.9 movup.9 swap movup.5 movup.6 swap.3 exec.mulstep drop movdn.3 push.0 dropw # b[4] push.0.0.0.0 loc_loadw.3 push.0.0.0.0 loc_loadw.2 # load the xs # OPTIM: don't need a[4-7], but can't use mulstep4 if we don't load push.0.0.0.0 loc_loadw.0 push.0 dropw # b[4] exec.mulstep4 dropw drop drop # OPTIM: don't need a[4-7], but can't use mulstep4 if we don't load # b[5] push.0.0.0.0 loc_loadw.3 push.0.0.0.0 loc_loadw.0 movup.2 movdn.3 push.0 dropw movup.7 dup.1 movup.6 push.0 exec.mulstep swap movdn.7 movup.4 dup.2 movup.7 swap.3 exec.mulstep swap movdn.5 swap movup.3 movup.4 swap.3 exec.mulstep drop swap drop # b[6] push.0.0.0.0 loc_loadw.3 push.0.0.0.0 loc_loadw.0 swap movdn.3 push.0 dropw movup.6 dup.1 movup.6 push.0 exec.mulstep swap movdn.6 swap movup.4 movup.5 swap.3 exec.mulstep drop movdn.2 drop drop # b[7] push.0.0.0.0 loc_loadw.3 push.0.0.0.0 loc_loadw.0 movdn.3 push.0 dropw movup.4 movup.5 movdn.2 push.0 exec.mulstep drop movdn.3 drop drop drop push.0.0.0.0 loc_loadw.4 swapw end