use.std::crypto::stark::constants use.std::crypto::stark::utils use.std::crypto::hashes::rpo #! Helper procedure to compute addition of two words component-wise. #! Input: [b3, b2, b1, b0, a3, a2, a1, a0] #! Output: [c3, c2, c1, c0] #! #! Cycles: 16 proc.add_two_words movup.3 movup.7 add #=> [c0, b3, b2, b1, a3, a2, a1] movup.3 movup.6 add #=> [c1, c0, b3, b2, a3, a2] movup.3 movup.5 add #=> [c2, c1, c0, b3, a3] movup.3 movup.4 add #=> [c3, c2, c1, c0] end #! Return the first half of the rate portion of the random coin state #! #! The random coin uses RPO to generate data. The RPO state is composed of 3 #! words, 2 words for the rate, and 1 word for the capacity. This procedure #! returns the first word of the RPO state. #! #! Input: [...] #! Output: [R1, ...] #! Cycles: 6 export.get_rate_1 padw exec.constants::r1_ptr mem_loadw end #! Return the second half of the rate portion of the random coin state #! #! The random coin uses RPO to generate data. The RPO state is composed of 3 #! words, 2 words for the rate, and 1 word for the capacity. This procedure #! returns the first word of the RPO state. #! #! Input: [...] #! Output: [R2, ...] #! Cycles: 6 export.get_rate_2 padw exec.constants::r2_ptr mem_loadw end #! Return the capacity portion of the random coin state #! #! The random coin uses RPO to generate data. The RPO state is composed of 3 #! words, 2 words for the rate, and 1 word for the capacity. This procedure #! returns the first word of the RPO state. #! #! Input: [...] #! Output: [C, ...] #! Cycles: 6 export.get_capacity padw exec.constants::c_ptr mem_loadw end #! Initializes the seed for randomness generation by computing the hash of the proof context using #! the trace length, number of queries, logarithm of blowup factor and the number of bits of #! grinding. Currently, this part, as well as the rest of the STARK verifier assumes a blowup factor #! equal to 8. #! The ouput of this procedure is the capacity portion of the state after applying `hperm`. #! #! Input: [log(trace_length), num_queries, blowup, grinding, ...] #! Output: [C] #! Cycles: 175 export.init_seed # Save the parameters in memory for later use dup exec.constants::trace_length_log_ptr mem_store dup.1 exec.constants::number_queries_ptr mem_store dup.3 exec.constants::grinding_factor_ptr mem_store # Pre-load constants used by hperm into memory and initialize the state of the random coin to zeros. # Since memory beyond 3 * 2^30 does not have any special meaning, we can use the memory region # starting from address 2^32 - 1 in decreasing order to hold constants that are used throughout # the `verify` procedure. # # Cycles: 22 padw exec.constants::zero_word mem_storew exec.constants::c_ptr mem_storew exec.constants::r1_ptr mem_storew exec.constants::r2_ptr mem_storew drop push.1 swap.3 exec.constants::zero_zero_zero_one_word mem_storew dropw #=> [log(trace_length), num_queries, log(blowup), grinding] # Create the initial seed for randomness generation from proof context ## Compute trace_length ## Cycles: 20 dup pow2 u32split assertz #=> [trace_length, log(trace_length), num_queries, log(blowup), grinding] ## Save the trace length and its log to memory dup.0 exec.constants::trace_length_ptr mem_store ## Assert blowup is equal to 8 ## Cycles: 6 swap dup.3 dup push.3 assert_eq ## Compute log(lde_size) and lde_size and store them add swap movup.3 pow2 dup movdn.4 swap dup movdn.3 mul # Compute lde_domain generator dup.1 exec.utils::compute_lde_generator movdn.2 #=> [lde_size, log(lde_size), lde_g, trace_length, num_queries, blowup, grinding] push.0 movdn.3 #=> [lde_size, log(lde_size), lde_g, 0, trace_length, num_queries, blowup, grinding] # Save `[lde_size, log(lde_size), lde_g, 0]` exec.constants::lde_size_ptr mem_storew #=> [lde_size, log(lde_size), lde_g, 0, trace_length, num_queries, blowup, grinding] # clean stack drop drop #=> [lde_g, 0, trace_length, num_queries, blowup, grinding] # Compute trace generator `trace_g` = `lde_g^blowup_factor` repeat.3 dup mul end #=> [trace_g, 0, trace_length, num_queries, blowup, grinding] # Save `trace_g` to memory exec.constants::trace_domain_generator_ptr mem_store #=> [0, trace_length, num_queries, blowup, grinding] # clean satck drop #=> [trace_length, num_queries, blowup, grinding] # Construct the proof context ##trace layout info push.1208027408 ##field modulus bytes (2 field elements) push.1 push.4294967295 ## field extension and FRI parameters push.132103 # Hash proof context # Cycles: 15 swapw push.1.0.0.0 #=> [0, 0, 0, 1, B, A, ..] swapw.2 swapw #=> [B, A, 0, 0, 0, 1, ..] hperm dropw dropw #=> [C] end #! Reseed the random coin with `DATA` #! #! Input: [DATA, ...] #! Ouput: [...] #! Cycles: 54 export.reseed # Load previous state and update it # -------------------------------------------------------------------------------------------- exec.get_rate_1 # => [R1, DATA, ...] (6 cycles) exec.add_two_words # => [R1, ...] (16 cycles) exec.get_capacity swapw exec.get_rate_2 # => [R2, R1, C, ...] (13 cycles) hperm # => [R2', R1', C', ...] (1 cycles) # Save the new state to memory # -------------------------------------------------------------------------------------------- exec.constants::r2_ptr mem_storew dropw exec.constants::r1_ptr mem_storew dropw exec.constants::c_ptr mem_storew dropw # => [...] (18 cycles) end # COEFFICIENT GENERATION # ============================================================================================= #! Generates a `num_tuples` tuples of random field elements and stores them in memory #! starting from address `dest_ptr`. Each memory address holds two tuples. #! TODO: Generalize by keeping track of something similar to the `output` variable in `RpoRandomCoin` #! so that we keep track of already used randomness and know when there is a need to apply `hperm`. #! #! Input: [dest_ptr, num_tuples, ...] #! Output: [...] #! #! Cycles: 69 + (22 * num_tuples) / 4 proc.generate_random_coefficients # Compute the loop counter. We use checked division to make sure the number is a multiple of 4. # If we use field division and num_tuples is not a multiple of 4 then we will enter into # a very large loop with high probability. push.0 dup movup.2 movup.3 u32assert u32divmod.4 assertz neg #=> [loop_ctr, dest_ptr, x, x, ...] # where loop_ctr = - num_tuples / 4; we negate the counter so that we can count up to 0 exec.get_rate_1 dup.5 mem_storew exec.get_rate_2 dup.9 add.1 mem_storew #=> [R2, R1, loop_ctr, dest_ptr, 0, 0, ..] exec.get_capacity swapdw swapw swap add.2 swap #=> [loop_ctr, dest_ptr, 0, 0, R1, C, R2, ..] add.1 dup neq.0 while.true swapw.3 hperm #=> [R2, R1, C, loop_ctr, dest_ptr, x, x, ...] # save R2 to mem[dest+1]; we use dup.13 here because it takes only 1 cycle dup.13 add.1 mem_storew #=> [R2, R1, C, loop_ctr, dest_ptr, x, x, ...] # save R1 to mem[dest] swapw dup.13 mem_storew swapw #=> [R2, R1, C, loop_ctr, dest_ptr, x, x, ...] # update destination pointer and loop counter swapw.3 #=> [loop_ctr, dest_ptr, x, x, R1, C, R2, ...] swap add.2 swap #=> [loop_ctr, dest_ptr+2, x, x, R1, C, R2, ...] add.1 dup #=> [loop_ctr+1, loop_ctr+1, dest_ptr+2, x, x, R1, C, R2, ...] neq.0 end # Save the new state of the random coin dropw exec.constants::r1_ptr mem_storew dropw exec.constants::c_ptr mem_storew dropw exec.constants::r2_ptr mem_storew dropw #=> [...] end #! Generates a `num_tuples` tuples of random field elements and stores them in memory #! starting from address `dest_ptr`. Each memory address holds one tuple. #! TODO: Generalize by keeping track of something similar to the `output` variable in `RpoRandomCoin` #! so that we keep track of already used randomness and know when there is a need to apply `hperm`. #! #! Input: [dest_ptr, num_tuples, ...] #! Output: [...] #! #! Cycles: 104 + (69 * num_tuples) / 4 proc.generate_random_coefficients_pad # Compute the loop counter. We use checked division to make sure the number is a multiple of 4. # If we use field division and num_tuples is not a multiple of 4 then we will enter into # a very large loop with high probability. push.0 dup movup.2 movup.3 u32assert u32divmod.4 assertz neg #=> [loop_ctr, dest_ptr, x, x, ...] # where loop_ctr = - num_tuples / 4; we negate the counter so that we can count up to 0 exec.get_rate_1 dup.5 #=> [dest_ptr, a11, a10, a01, a00, loop_ctr, dest_ptr, x, x, ...] dup.4 dup.4 push.0.0 dup.4 mem_storew #=> [0, 0, a01, a00, dest_ptr, a11, a10, a01, a00, loop_ctr, dest_ptr, x, x, ...] dropw dup.2 dup.2 push.0.0 movup.4 add.1 mem_storew #=> [0, 0, a11, a10, a11, a10, a01, a00, loop_ctr, dest_ptr, x, x, ...] exec.constants::r2_ptr mem_loadw dup.9 add.4 swap.10 #=> [dest_ptr, a31, a30, a21, a20, a11, a10, a01, a00, loop_ctr, dest_ptr+4, x, x, ...] dup.4 dup.4 push.0.0 dup.4 add.2 mem_storew #=> [0, 0, a21, a20, dest_ptr, a31, a30, a21, a20, a11, a10, a01, a00, loop_ctr, dest_ptr+4, x, x, ...] dropw dup.2 dup.2 push.0.0 movup.4 add.3 mem_storew #=> [0, 0, a31, a30, a31, a30, a21, a20, a11, a10, a01, a00, loop_ctr, dest_ptr+4, x, x, ...] exec.constants::c_ptr mem_loadw swapdw swapw #=> [loop_ctr, dest_ptr, 0, 0, R1, C, R2, ..] add.1 dup neq.0 while.true swapw.3 hperm #=> [R2, R1, C, loop_ctr, dest_ptr, x, x, ...] # save R2 to mem[dest+1]; we use dup.13 here because it takes only 1 cycle dup.13 dup.4 dup.4 push.0.0 dup.4 add.2 mem_storew #=> [0, 0, a21, a20, dest_ptr, a31, a30, a21, a20, a11, a10, a01, a00, C, loop_ctr, dest_ptr, x, x, ...] dropw dup.2 dup.2 push.0.0 movup.4 add.3 mem_storew #=> [0, 0, a31, a30, a31, a30, a21, a20, a11, a10, a01, a00, C, loop_ctr, dest_ptr, x, x, ...] # save R1 to mem[dest] dropw swapw dup.13 dup.4 dup.4 push.0.0 dup.4 mem_storew #=> [0, 0, a01, a00, dest_ptr, a11, a10, a01, a00, a31, a30, a21, a20, C, loop_ctr, dest_ptr, x, x, ...] dropw dup.2 dup.2 push.0.0 movup.4 add.1 mem_storew #=> [0, 0, a01, a00, a11, a10, a01, a00, a31, a30, a21, a20, C, loop_ctr, dest_ptr, x, x, ...] # reshuffle and update destination pointer and loop counter dropw swapw swapw.3 #=> [loop_ctr, dest_ptr, x, x, R1, C, R2, ...] swap add.4 swap #=> [loop_ctr, dest_ptr+2, x, x, R1, C, R2, ...] add.1 dup #=> [loop_ctr+1, loop_ctr+1, dest_ptr+2, x, x, R1, C, R2, ...] neq.0 end # Save the new state of the random coin dropw exec.constants::r1_ptr mem_storew dropw exec.constants::c_ptr mem_storew dropw exec.constants::r2_ptr mem_storew dropw #=> [...] end #! Draw a list of random extension field elements related to the auxiliary trace and store the list #! in memory from `aux_rand_elem_ptr` to `aux_rand_elem_ptr + 8 - 1` #! #! Input: [aux_rand_elem_ptr, ...] #! Output: [...] #! Cycles: 150 export.generate_aux_randomness push.16 swap exec.generate_random_coefficients #=> [...] end #! Draw constraint composition random coefficients and save them into memory in the region from #! `compos_coef_ptr` `compos_coef_ptr + 118 - 1` as `(r1_1, r1_0, r0_1, r0_0)` #! #! Input: [compos_coef_ptr, ...] #! Output: [...] #! Cycles: 1309 export.generate_constraint_composition_coefficients push.236 swap exec.generate_random_coefficients #=> [...] end #! Draw deep composition polynomial random coefficients and save them into memory in the region from #! `deep_rand_coef_ptr` to `deep_rand_coef_ptr + 89 - 1` as `(0, 0, r0_1, r0_0)` #! The number of coefficients is equal to: #! 1. (72 + 9) * 2 Felt for the main and auxiliary traces. #! 2. 8 * 2 Felt for constraint polynomial. #! Total: 89 tuples of type (Felt, Felt) #! #! Input: [deep_rand_coef_ptr, ...] #! Output: [...] #! Cycles: 1693 export.generate_deep_composition_random_coefficients push.92 swap exec.generate_random_coefficients_pad #=> [...] end # OOD POINT GENERATION # ============================================================================================= #! Generate the OOD challenge point `z = (z0, z1)` and compute `z^N` where N is #! the trace length. The resulting word `[(z_1, z_0)^N, z1, z0]` is stored in the #! global memory address `exec.z_ptr` reservedfor it. #! #! Input: [X, ...] #! Output: [...] #! Note: The top word on the stack is consumed by this procedure. #! Cycles: 21 + 10 * log(N) export.generate_z_zN # Load z (first two felts of the random coin state) and log trace length N exec.constants::r1_ptr mem_loadw drop drop exec.constants::trace_length_log_ptr mem_load # => [log(trace_len), z_1, z_0, ...] dup.2 dup.2 # => [z_1, z_0, log(trace_len), z_1, z_0, ...] # Compute z^N using the fact that z^N = z^(2^log(N)) # Loop starts with `i=log(trace_len)` push.1 while.true dup.1 dup.1 ext2mul # => [(z_1, z_0)^n, i, z_1, z_0, ...] dup.2 sub.1 swap.3 push.1 neq # => [b, (z_1, z_0)^n, i-1, z_1, z_0, ...] end movup.2 drop # => [(z_1, z_0)^n, z_1, z_0, ...] # Store z and z^N exec.constants::z_ptr mem_storew dropw end # INDEX GENERATION # ============================================================================================= # Helper function for generating a list of indices that takes a word of random felts and saves # to memory region referenced by `ptr` 4 random integers in the range 0..=(mask+1). # `depth` is saved next to each of the 4 integers for use in subsequent steps. # # Input: [R, ptr, mask, depth, ...] # Output:[...] # # Cycles: 100 proc.generate_four_integers # Get the first random felt dup.3 # [r0, R1, ptr, mask, depth, ...] u32split swap # [r0_lo, r0_hi, R1, ptr, mask, depth, ...] dup.7 # [mask, r0_lo, r0_hi, R1, ptr, mask, depth, ...] u32and # [r, r0_hi, R1, ptr, mask, depth, ...] dup.8 swap # [r, depth, r0_hi, R1, ptr, mask, depth, ...] push.0 movdn.3 # [r, depth, r0_hi, 0, R1, ptr, mask, depth, ...] # Store and update pointer dup.8 add.1 swap.9 # [ptr, r, depth, r0_hi, 0, R1, ptr + 1, mask, depth, ...] mem_storew dropw # [R1, ptr + 1, mask, depth, ...] # Get the second random felt dup.2 # [r1, R1, ptr, mask, depth, ...] u32split swap # [r1_lo, r1_hi, R1, ptr, mask, depth, ...] dup.7 # [mask, r1_lo, r1_hi, R1, ptr, mask, depth, ...] u32and # [r, r1_hi, R1, ptr, mask, depth, ...] dup.8 swap # [r, depth, r1_hi, R1, ptr, mask, depth, ...] push.0 movdn.3 # [r, depth, r1_hi, 0, R1, ptr, mask, depth, ...] # Store and update pointer dup.8 add.1 swap.9 # [ptr, r, depth, r1_hi, 0, R1, ptr + 1, mask, depth, ...] mem_storew dropw # [R1, ptr + 1, mask, depth, ...] # Get the third random felt dup.1 u32split swap dup.7 u32and dup.8 swap push.0 movdn.3 # Store and update pointer dup.8 add.1 swap.9 mem_storew dropw # Get the fourth random felt dup u32split swap dup.7 u32and dup.8 swap push.0 movdn.3 # Store and update pointer dup.8 add.1 swap.9 mem_storew dropw end # Helper function for generating a list of indices. It takes a word of random felts and saves # to a memory region, referenced by `ptr`, 3 random integers in the range 0..=(mask+1). This procedure # is used to generate a list of random indices that are used in FRI. Moreover, this procedure # is called first, and right after the PoW check, thus the first element in the rate portion of # the state is discarded. # `depth` is saved next to each of the 3 integers for use in subsequent steps. # # Input: [R, ptr, mask, depth, ...] # Output:[R, ptr + 3, mask, depth, ...] # # Cycles: 75 proc.generate_three_integers # Get the second random felt dup.2 # [r0, R1, ptr, mask, depth, ...] u32split swap # [r0_lo, r0_hi, R1, ptr, mask, depth, ...] dup.7 # [mask, r0_lo, r0_hi, R1, ptr, mask, depth, ...] u32and # [r, r0_hi, R1, ptr, mask, depth, ...] dup.8 swap # [r, depth, r0_hi, R1, ptr, mask, depth, ...] push.0 movdn.3 # [r, depth, r0_hi, 0, R1, ptr, mask, depth, ...] # Store and update pointer dup.8 add.1 swap.9 # [ptr, r, depth, r0_hi, 0, R1, ptr + 1, mask, depth, ...] mem_storew dropw # [R1, ptr + 1, mask, depth, ...] # Get the second random felt dup.1 # [r1, R1, ptr, mask, depth, ...] u32split swap # [r1_lo, r1_hi, R1, ptr, mask, depth, ...] dup.7 # [mask, r1_lo, r1_hi, R1, ptr, mask, depth, ...] u32and # [r, r1_hi, R1, ptr, mask, depth, ...] dup.8 swap # [r, depth, r1_hi, R1, ptr, mask, depth, ...] push.0 movdn.3 # [r, depth, r1_hi, 0, R1, ptr, mask, depth, ...] # Store and update pointer dup.8 add.1 swap.9 # [ptr, r, depth, r1_hi, 0, R1, ptr + 1, mask, depth, ...] mem_storew dropw # [R1, ptr + 1, mask, depth, ...] # Get the third random felt dup.0 u32split swap dup.7 u32and dup.8 swap push.0 movdn.3 # Store and update pointer dup.8 add.1 swap.9 mem_storew dropw end #! Generate a list of `num_queries` number of random indices in the range #! [0, lde_size] and store it in memory starting from `query_ptr`. #! The list is stored as `(r, depth, y, y)` where `depth` is `log(lde_domain_size)`. #!`depth` is needed when computing the deep queries. #! TODO: the case of duplicate queries #! #! Input: [query_ptr, num_queries, ...] #! Output: [...] #! #! Cycles: 267 + q * 236 + r * 29 where q = num_queries / 8 and r = num_queries % 8 #! #! NOTE: This procedure is called first, and right after the PoW check, thus the first element #! in the rate portion of the state is discarded. #! NOTE: The cycles count can be estimated, using the fact that r < 8, via the more compact formula #! 470 + 236 * (num_queries / 8) export.generate_list_indices # Create mask padw exec.constants::lde_size_ptr mem_loadw movup.2 drop movup.2 drop sub.1 #=> [mask, depth, query_ptr, num_queries] where depth = log(lde_size) # Get address holding the integers (this will later hold the FRI queries) movup.2 #=> [query_ptr, mask, depth, num_queries] # Load the first half of the rate portion of the state of the random coin. We discard the first # element as it is used for PoW and use the remaining the 3. exec.get_rate_1 exec.generate_three_integers # Load the second half of the rate portion of the state of the random coin. exec.constants::r2_ptr mem_loadw exec.generate_four_integers #=> [R2, query_ptr, mask, depth, num_queries, ...] # Squeeze exec.constants::c_ptr mem_loadw exec.get_rate_1 exec.get_rate_2 hperm exec.rpo::squeeze_digest #=> [R1, query_ptr, mask, depth, num_queries, ...] # Use `num_queries` to iterate. ## Subtract the 7 elements we have already generated above. movup.7 push.7 sub ## Divide by 8 to get the number of iterations u32assert u32divmod.8 #=> [remainder, quotient, X, query_ptr, mask, depth, ...] ## Save remainder for later use movdn.8 ## Use `quotient` to iterate dup movdn.8 push.0 neq while.true exec.generate_four_integers exec.constants::r2_ptr mem_loadw exec.generate_four_integers #=> [R2, query_ptr, mask, depth, num_queries, ...] # Squeeze exec.constants::c_ptr mem_loadw exec.get_rate_1 exec.get_rate_2 hperm exec.rpo::squeeze_digest #=> [R1, query_ptr, mask, depth, num_queries, ...] movup.7 sub.1 dup movdn.8 push.0 neq end ## Use remainder ### Put the remaining number of queries to generate in the appropriate stack position movup.8 movdn.7 ### Load the second half of the rate portion of the state of the random coin. padw exec.constants::r2_ptr mem_loadw #=> [R2, R1, query_ptr, mask, depth, num_queries, ...] ### Iterate over remainder dup.11 sub.1 swap.12 neq.0 while.true movup.7 u32split swap # [r0_lo, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...] dup.10 # [mask, r0_lo, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...] u32and # [r, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...] dup.11 swap # [r, depth, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...] push.0 movdn.3 # [r, depth, r0_hi, 0, R2, r3, r2, r1, ptr, mask, depth, ...] # Store and update pointer dup.11 add.1 swap.12 # [ptr, r, depth, r0_hi, 0, R2, r3, r2, r1, ptr + 1, mask, depth, ...] mem_storew drop drop drop # [x, R2, r3, r2, r1, ptr + 1, mask, depth, ...] dup.11 sub.1 swap.12 push.0 neq end dropw dropw dropw drop end # PROOF-OF-WORK CHECK # ============================================================================================= #! Check that the Proof-of-Work contained in the nonce is equal to the required number #! of bits prescribed by grinding bits. The grinding factor is assumed to be less than 32. #! #! Input: [grinding_factor, ...] #! Output: [...] #! Cycles: 73 export.check_pow # Compute the mask. pow2 u32assert u32overflowing_sub.1 assertz #=> [mask, ...] # Load Capacity portion exec.get_capacity # Load first half of rate portion and add pow witness to first element of rate exec.get_rate_1 adv_push.1 dup.4 add swap.4 drop # Load the second half of rate portion and apply the permutation padw exec.constants::r2_ptr mem_loadw hperm #=> [R2, R1, C, mask, ...] # Save the new random coin state exec.constants::r2_ptr mem_storew dropw exec.constants::r1_ptr mem_storew swapw exec.constants::c_ptr mem_storew dropw drop drop drop #=> [R10, mask] # Make sure the PoW is valid u32split drop u32and assertz drop #=> [...] end