#! Given ω, accumulator ν and evaluation point τ on the top of the stack, this routine first computes #! ⍳ = ω / (τ - ω). It then loads four elements from memory address (q_ptr + 1) i.e. a0, a1, a2, a3 #! and finally accumulates it into ν, while consuming only when j is even | j ∈ [0..64) #! #! Where: #! loaded from (q_ptr + 1) s.t. a1, a0 are consumed into ν, and #! remaining part of word i.e. a3, a2 will be consumed during immediate next iteration #! of computing β which involves invocation of accumulate_for_odd_index. #! #! Input: [ω, ν1, ν0, τ1, τ0, q_ptr - 1, ...] #! Output: [a3, a2, ν1', ν0', τ1, τ0, q_ptr, ...] #! #! Cycles: 54 proc.accumulate_for_even_index # compute ⍳ = ω / (τ - ω) dup.4 dup.1 sub dup.4 ext2inv swap dup.2 mul movdn.2 mul # load from q_ptr + 1 # # notice, first increment the memory address and then load from it. movup.6 add.1 movdn.6 push.0.0.0 dup.9 push.0 swap mem_loadw # consume into ν' s.t. j is even && j ∈ [0..64) # # uses formula ν' = ν + ⍳ * a | a = (a0, a1), ⍳ = (⍳0, ⍳1), ν = (ν0, ν1) and ν' = (ν'0, ν'1) # also note, ν' becomes the new state of accumulator ν movup.5 movup.5 movup.5 movup.5 ext2mul swap movup.5 add swap movup.4 add movdn.3 movdn.3 # notice, were not consumed in this iteration, they will be consumed # into ν, in next iteration, when j is odd. end #! Given ω, accumulator ν and τ on stack top, this routine first computes ⍳ = ω / (τ - ω), and #! then accumulates ⍳ into ν, while consuming only when j is odd | j ∈ [0..64), following #! formula for computing β. #! #! Where: #! which was loaded (during the previous iteration i.e. when j was even) from q_ptr #! is consumed into ν. #! #! Input: [ω, a3, a2, ν1, ν0, τ1, τ0, q_ptr, ...] #! Output: [ν1', ν0', τ1, τ0, q_ptr, ...] #! #! Cycles: 30 proc.accumulate_for_odd_index # compute ⍳ = ω / (τ - ω) dup.6 dup.1 sub dup.6 ext2inv swap dup.2 mul movdn.2 mul # consume into ν' s.t. j is odd && j ∈ [0..64) # # uses formula ν' = ν + ⍳ * a | a = (a2, a3), ⍳ = (⍳0, ⍳1), ν = (ν0, ν1) and ν' = (ν'0, ν'1) # and ν' becomes the new value of accumulator ν ext2mul swap movup.3 add swap movup.2 add end #! Given τ and starting memory address to q (FRI codeword of size 64), this routine computes β. #! #! Input: [τ1, τ0, q_ptr, ...] #! Output: [β1, β0, τ1, τ0, q_ptr, ...] #! #! Cycles: 2802 proc.compute_beta_64 # decrement starting address by 1, because we first increment and then load onto the stack movup.2 sub.1 movdn.2 push.0.0 # accumulator for β # for j = 0 push.1 exec.accumulate_for_even_index # for j = 1 push.8 exec.accumulate_for_odd_index # for j = 2 push.64 exec.accumulate_for_even_index # for j = 3 push.512 exec.accumulate_for_odd_index # for j = 4 push.4096 exec.accumulate_for_even_index # for j = 5 push.32768 exec.accumulate_for_odd_index # for j = 6 push.262144 exec.accumulate_for_even_index # for j = 7 push.2097152 exec.accumulate_for_odd_index # for j = 8 push.16777216 exec.accumulate_for_even_index # for j = 9 push.134217728 exec.accumulate_for_odd_index # for j = 10 push.1073741824 exec.accumulate_for_even_index # for j = 11 push.8589934592 exec.accumulate_for_odd_index # for j = 12 push.68719476736 exec.accumulate_for_even_index # for j = 13 push.549755813888 exec.accumulate_for_odd_index # for j = 14 push.4398046511104 exec.accumulate_for_even_index # for j = 15 push.35184372088832 exec.accumulate_for_odd_index # for j = 16 push.281474976710656 exec.accumulate_for_even_index # for j = 17 push.2251799813685248 exec.accumulate_for_odd_index # for j = 18 push.18014398509481984 exec.accumulate_for_even_index # for j = 19 push.144115188075855872 exec.accumulate_for_odd_index # for j = 20 push.1152921504606846976 exec.accumulate_for_even_index # for j = 21 push.9223372036854775808 exec.accumulate_for_odd_index # for j = 22 push.17179869180 exec.accumulate_for_even_index # for j = 23 push.137438953440 exec.accumulate_for_odd_index # for j = 24 push.1099511627520 exec.accumulate_for_even_index # for j = 25 push.8796093020160 exec.accumulate_for_odd_index # for j = 26 push.70368744161280 exec.accumulate_for_even_index # for j = 27 push.562949953290240 exec.accumulate_for_odd_index # for j = 28 push.4503599626321920 exec.accumulate_for_even_index # for j = 29 push.36028797010575360 exec.accumulate_for_odd_index # for j = 30 push.288230376084602880 exec.accumulate_for_even_index # for j = 31 push.2305843008676823040 exec.accumulate_for_odd_index # for j = 32 push.18446744069414584320 exec.accumulate_for_even_index # for j = 33 push.18446744069414584313 exec.accumulate_for_odd_index # for j = 34 push.18446744069414584257 exec.accumulate_for_even_index # for j = 35 push.18446744069414583809 exec.accumulate_for_odd_index # for j = 36 push.18446744069414580225 exec.accumulate_for_even_index # for j = 37 push.18446744069414551553 exec.accumulate_for_odd_index # for j = 38 push.18446744069414322177 exec.accumulate_for_even_index # for j = 39 push.18446744069412487169 exec.accumulate_for_odd_index # for j = 40 push.18446744069397807105 exec.accumulate_for_even_index # for j = 41 push.18446744069280366593 exec.accumulate_for_odd_index # for j = 42 push.18446744068340842497 exec.accumulate_for_even_index # for j = 43 push.18446744060824649729 exec.accumulate_for_odd_index # for j = 44 push.18446744000695107585 exec.accumulate_for_even_index # for j = 45 push.18446743519658770433 exec.accumulate_for_odd_index # for j = 46 push.18446739671368073217 exec.accumulate_for_even_index # for j = 47 push.18446708885042495489 exec.accumulate_for_odd_index # for j = 48 push.18446462594437873665 exec.accumulate_for_even_index # for j = 49 push.18444492269600899073 exec.accumulate_for_odd_index # for j = 50 push.18428729670905102337 exec.accumulate_for_even_index # for j = 51 push.18302628881338728449 exec.accumulate_for_odd_index # for j = 52 push.17293822564807737345 exec.accumulate_for_even_index # for j = 53 push.9223372032559808513 exec.accumulate_for_odd_index # for j = 54 push.18446744052234715141 exec.accumulate_for_even_index # for j = 55 push.18446743931975630881 exec.accumulate_for_odd_index # for j = 56 push.18446742969902956801 exec.accumulate_for_even_index # for j = 57 push.18446735273321564161 exec.accumulate_for_odd_index # for j = 58 push.18446673700670423041 exec.accumulate_for_even_index # for j = 59 push.18446181119461294081 exec.accumulate_for_odd_index # for j = 60 push.18442240469788262401 exec.accumulate_for_even_index # for j = 61 push.18410715272404008961 exec.accumulate_for_odd_index # for j = 62 push.18158513693329981441 exec.accumulate_for_even_index # for j = 63 push.16140901060737761281 exec.accumulate_for_odd_index # compute (τ^64 - 1) / 64 dup.3 dup.3 repeat.6 dup.1 dup.1 ext2mul end swap sub.1 swap push.18158513693329981441.0 ext2mul ext2mul end #! Given τ and starting memory address to q (FRI codeword of size 32), this routine computes β. #! #! Input: [τ1, τ0, q_ptr, ...] #! Output: [β1, β0, τ1, τ0, q_ptr, ...] #! #! Cycles: 1421 proc.compute_beta_32 # decrement starting address by 1, because we first increment and then load onto the stack movup.2 sub.1 movdn.2 push.0.0 # accumulator for β # for j = 0 push.1 exec.accumulate_for_even_index # for j = 1 push.64 exec.accumulate_for_odd_index # for j = 2 push.4096 exec.accumulate_for_even_index # for j = 3 push.262144 exec.accumulate_for_odd_index # for j = 4 push.16777216 exec.accumulate_for_even_index # for j = 5 push.1073741824 exec.accumulate_for_odd_index # for j = 6 push.68719476736 exec.accumulate_for_even_index # for j = 7 push.4398046511104 exec.accumulate_for_odd_index # for j = 8 push.281474976710656 exec.accumulate_for_even_index # for j = 9 push.18014398509481984 exec.accumulate_for_odd_index # for j = 10 push.1152921504606846976 exec.accumulate_for_even_index # for j = 11 push.17179869180 exec.accumulate_for_odd_index # for j = 12 push.1099511627520 exec.accumulate_for_even_index # for j = 13 push.70368744161280 exec.accumulate_for_odd_index # for j = 14 push.4503599626321920 exec.accumulate_for_even_index # for j = 15 push.288230376084602880 exec.accumulate_for_odd_index # for j = 16 push.18446744069414584320 exec.accumulate_for_even_index # for j = 17 push.18446744069414584257 exec.accumulate_for_odd_index # for j = 18 push.18446744069414580225 exec.accumulate_for_even_index # for j = 19 push.18446744069414322177 exec.accumulate_for_odd_index # for j = 20 push.18446744069397807105 exec.accumulate_for_even_index # for j = 21 push.18446744068340842497 exec.accumulate_for_odd_index # for j = 22 push.18446744000695107585 exec.accumulate_for_even_index # for j = 23 push.18446739671368073217 exec.accumulate_for_odd_index # for j = 24 push.18446462594437873665 exec.accumulate_for_even_index # for j = 25 push.18428729670905102337 exec.accumulate_for_odd_index # for j = 26 push.17293822564807737345 exec.accumulate_for_even_index # for j = 27 push.18446744052234715141 exec.accumulate_for_odd_index # for j = 28 push.18446742969902956801 exec.accumulate_for_even_index # for j = 29 push.18446673700670423041 exec.accumulate_for_odd_index # for j = 30 push.18442240469788262401 exec.accumulate_for_even_index # for j = 31 push.18158513693329981441 exec.accumulate_for_odd_index # compute (τ^32 - 1) / 32 dup.3 dup.3 repeat.5 dup.1 dup.1 ext2mul end swap sub.1 swap push.17870283317245378561.0 ext2mul ext2mul end #! Given τ and end memory address of remainder polynomial p (with degree at max 7, over #! quadratic extension field of Fq) on the top of the stack, this routine computes α. #! #! Note, p_ptr is absolute memory address of polynomial p s.t. next 3 decreasing (by 1) #! memory addresses hold remaining 6 coefficients of p. #! #! Input: [τ1, τ0, p_ptr, ...] #! Output: [α1, α0, ...] #! #! Cycles: 114 proc.compute_alpha_64 padw dup.6 sub.1 swap.7 mem_loadw #=> [a11, a10, a01, a00, τ1, τ0, p_ptr-1, ...] dup.5 dup.5 ext2mul ext2add #=> [acc1, acc0, τ1, τ0, p_ptr-1, ...] movup.4 dup sub.1 movdn.5 padw movup.4 mem_loadw #=> [a11, a10, a01, a00, acc1, acc0, τ1, τ0, p_ptr-1, ...] movup.5 movup.5 dup.7 dup.7 #=> [τ1, τ0, acc1, acc0, a11, a10, a01, a00, τ1, τ0, p_ptr-1, ...] ext2mul ext2add #=> [acc1, acc0, a01, a00, τ1, τ0, p_ptr-1, ...] dup.5 dup.5 ext2mul ext2add #=> [acc1, acc0, τ1, τ0, p_ptr-1, ...] movup.4 dup sub.1 movdn.5 padw movup.4 mem_loadw #=> [a11, a10, a01, a00, acc1, acc0, τ1, τ0, p_ptr-1, ...] movup.5 movup.5 dup.7 dup.7 #=> [τ1, τ0, acc1, acc0, a11, a10, a01, a00, τ1, τ0, p_ptr-1, ...] ext2mul ext2add #=> [acc1, acc0, a01, a00, τ1, τ0, p_ptr-1, ...] dup.5 dup.5 ext2mul ext2add #=> [acc1, acc0, τ1, τ0, p_ptr-1, ...] padw movup.8 mem_loadw #=> [a11, a10, a01, a00, acc1, acc0, τ1, τ0, ...] movup.5 movup.5 dup.7 dup.7 #=> [τ1, τ0, acc1, acc0, a11, a10, a01, a00, τ1, τ0, ...] ext2mul ext2add #=> [acc1, acc0, a01, a00, τ1, τ0, ...] movup.5 movup.5 ext2mul ext2add #=> [acc1, acc0, τ1, τ0, ...] end #! Given τ and end memory address of remainder polynomial p (with degree at most 3, over #! quadratic extension field of Fq) on the top of the stack, this routine computes α. #! #! Note, p_ptr is absolute memory address of polynomial p s.t. this and next (decreased by 1) #! memory address holds total 4 coefficients of p. #! #! Input: [τ1, τ0, p_ptr, ...] #! Output: [α1, α0, τ1, τ0, p_ptr, ...] #! #! Cycles: 47 proc.compute_alpha_32 padw dup.6 sub.1 swap.7 mem_loadw dup.5 dup.5 ext2mul ext2add padw movup.8 mem_loadw movup.5 movup.5 dup.7 dup.7 ext2mul ext2add movup.5 movup.5 ext2mul ext2add end #! Given memory address of the remainder codeword with 64 evaluations, this routine checks #! probabilistically that this codeword is the evaluation of a degree 7 polynomial. #! #! A few assumptions about q_ptr: #! - q_ptr is an absolute memory address of the beginning of remainder codeword. #! - Each evaluation is 2 elements wide because they belong to quadratic extension field (meaning #! each memory address will hold two consecutive evaluations) #! - Words (four field elements), in memory, are laid out in this order (a0_0, a0_1, a1_0, a1_1). #! This means that (a0_1, a0_0) -> first evaluation and (a1_1, a1_0) -> next evaluation #! - Next 31 memory addresses should be holding remaining 62 evaluations. That is, if q_ptr holds #! (a0_0, a0_1, a1_0, a1_1), then q_ptr + 1, must hold (a2_0, a2_1, a3_0, a3_1), and q_ptr + 31 #! should be holding (a62_0, a62_1, a63_0, a63_1). #! - The polynomial is laid out starting from memory address q_ptr + 32 and occupies 4 contiguous #! memory addresses. #! If remainder verification fails, execution of the program stops. #! #! Input: [τ1, τ0, q_ptr, ...] #! Output: [...] #! #! Cycles: 2931 export.verify_remainder_64 exec.compute_beta_64 #=> [β1, β0, τ1, τ0, q_ptr, ...] # Pointer to the last word of the remainder polynomial for Horner evaluation. movup.4 add.4 #=> [p_ptr, β1, β0, τ1, τ0, ...] # We need to multiply τ by the domain offset before evaluation. movup.4 mul.7 movup.4 mul.7 exec.compute_alpha_64 # assert α == β # # [α1, α0, β1, β0, ...] movup.2 eq movdn.2 eq and assert # [...] end #! Given memory address of the remainder codeword with 32 evaluations, this routine checks #! probabilistically that the codeword is the evaluation of a degree 3 polynomial. #! #! A few assumptions about q_ptr: #! - q_ptr is an absolute memory address of the beginning of remainder codeword. #! - Each evaluation is 2 elements wide because they belong to quadratic extension field (meaning #! each memory address will hold two consecutive evaluations) #! - Words (four field elements), in memory, are laid out in this order (a0_0, a0_1, a1_0, a1_1). #! This means that (a0_1, a0_0) -> first evaluation and (a1_1, a1_0) -> next evaluation #! - Next 15 memory addresses should be holding remaining 30 evaluations. That is, if q_ptr holds #! (a0_0, a0_1, a1_0, a1_1), then q_ptr + 1, must hold (a2_0, a2_1, a3_0, a3_1), and q_ptr + 15 #! should be holding (a30_0, a30_1, a31_0, a31_1). #! - The polynomial is laid out starting from memory address q_ptr + 16 and occupies 4 contiguous #! memory addresses. #! #! If remainder verification fails, execution of the program stops. #! #! Input: [τ1, τ0, q_ptr, ...] #! Output: [...] #! #! Cycles: 1483 export.verify_remainder_32 exec.compute_beta_32 #=> [β1, β0, τ1, τ0, q_ptr, ...] # Pointer to the last word of the remainder polynomial for Horner evaluation. movup.4 add.2 #=> [p_ptr, β1, β0, τ1, τ0, ...] # We need to multiply τ by the domain offset before evaluation. movup.4 mul.7 movup.4 mul.7 exec.compute_alpha_32 # assert α == β # # [α1, α0, β1, β0, ...] movup.2 eq movdn.2 eq and assert # [...] end