use.std::crypto::stark::constants
use.std::crypto::stark::utils
use.std::crypto::hashes::rpo

#! Helper procedure to compute addition of two words component-wise.
#! Input: [b3, b2, b1, b0, a3, a2, a1, a0]
#! Output: [c3, c2, c1, c0]
#!
#! Cycles: 16
proc.add_two_words
    movup.3
    movup.7
    add
    #=> [c0, b3, b2, b1, a3, a2, a1]

    movup.3
    movup.6
    add
    #=> [c1, c0, b3, b2, a3, a2]

    movup.3
    movup.5
    add
    #=> [c2, c1, c0, b3, a3]

    movup.3
    movup.4
    add
    #=> [c3, c2, c1, c0]
end

#! Return the first half of the rate portion of the random coin state
#!
#! The random coin uses RPO to generate data. The RPO state is composed of 3
#! words, 2 words for the rate, and 1 word for the capacity. This procedure
#! returns the first word of the RPO state.
#!
#! Input: [...]
#! Output: [R1, ...]
#! Cycles: 6
export.get_rate_1
    padw exec.constants::r1_ptr mem_loadw
end

#! Return the second half of the rate portion of the random coin state
#!
#! The random coin uses RPO to generate data. The RPO state is composed of 3
#! words, 2 words for the rate, and 1 word for the capacity. This procedure
#! returns the first word of the RPO state.
#!
#! Input: [...]
#! Output: [R2, ...]
#! Cycles: 6
export.get_rate_2
    padw exec.constants::r2_ptr mem_loadw
end

#! Return the capacity portion of the random coin state
#!
#! The random coin uses RPO to generate data. The RPO state is composed of 3
#! words, 2 words for the rate, and 1 word for the capacity. This procedure
#! returns the first word of the RPO state.
#!
#! Input: [...]
#! Output: [C, ...]
#! Cycles: 6
export.get_capacity
    padw exec.constants::c_ptr mem_loadw
end

#! Initializes the seed for randomness generation by computing the hash of the proof context using
#! the trace length, number of queries, logarithm of blowup factor and the number of bits of
#! grinding. Currently, this part, as well as the rest of the STARK verifier assumes a blowup factor
#! equal to 8.
#! The ouput of this procedure is the capacity portion of the state after applying `hperm`.
#!
#! Input: [log(trace_length), num_queries, blowup, grinding, ...]
#! Output: [C]
#! Cycles: 175
export.init_seed

    # Save the parameters in memory for later use
    dup exec.constants::trace_length_log_ptr mem_store
    dup.1 exec.constants::number_queries_ptr mem_store
    dup.3 exec.constants::grinding_factor_ptr mem_store

    # Pre-load constants used by hperm into memory and initialize the state of the random coin to zeros.
    # Since memory beyond 3 * 2^30 does not have any special meaning, we can use the memory region
    # starting from address 2^32 - 1 in decreasing order to hold constants that are used throughout
    # the `verify` procedure.
    #
    # Cycles: 22
    padw
    exec.constants::zero_word mem_storew
    exec.constants::c_ptr mem_storew
    exec.constants::r1_ptr mem_storew
    exec.constants::r2_ptr mem_storew

    drop
    push.1
    swap.3
    exec.constants::zero_zero_zero_one_word mem_storew
    dropw
    #=> [log(trace_length), num_queries, log(blowup), grinding]


    # Create the initial seed for randomness generation from proof context

    ## Compute trace_length
    ## Cycles: 20
    dup
    pow2
    u32split assertz
    #=> [trace_length, log(trace_length), num_queries, log(blowup), grinding]

    ## Save the trace length and its log to memory
    dup.0 exec.constants::trace_length_ptr mem_store

    ## Assert blowup is equal to 8
    ##  Cycles: 6
    swap
    dup.3
    dup
    push.3
    assert_eq

    ## Compute log(lde_size) and lde_size and store them
    add
    swap
    movup.3
    pow2
    dup
    movdn.4
    swap
    dup
    movdn.3
    mul

    # Compute lde_domain generator
    dup.1
    exec.utils::compute_lde_generator
    movdn.2
    #=> [lde_size, log(lde_size), lde_g, trace_length, num_queries, blowup, grinding]

    push.0
    movdn.3
    #=> [lde_size, log(lde_size), lde_g, 0, trace_length, num_queries, blowup, grinding]

    # Save `[lde_size, log(lde_size), lde_g, 0]`
    exec.constants::lde_size_ptr mem_storew
    #=> [lde_size, log(lde_size), lde_g, 0, trace_length, num_queries, blowup, grinding]

    # clean stack
    drop drop
    #=> [lde_g, 0, trace_length, num_queries, blowup, grinding]

    # Compute trace generator `trace_g` = `lde_g^blowup_factor`
    repeat.3
        dup mul
    end
    #=> [trace_g, 0, trace_length, num_queries, blowup, grinding]

    # Save `trace_g` to memory
    exec.constants::trace_domain_generator_ptr mem_store
    #=> [0, trace_length, num_queries, blowup, grinding]

    # clean satck
    drop
    #=> [trace_length, num_queries, blowup, grinding]

    # Construct the proof context

    ##trace layout info
    push.1208027408

    ##field modulus bytes (2 field elements)
    push.1
    push.4294967295

    ## field extension and FRI parameters
    push.132103


    # Hash proof context
    # Cycles: 15
    swapw
    push.1.0.0.0
    #=> [0, 0, 0, 1, B, A, ..]
    swapw.2
    swapw
    #=> [B, A, 0, 0, 0, 1, ..]
    hperm
    dropw
    dropw
    #=> [C]
end

#! Reseed the random coin with `DATA`
#!
#! Input: [DATA, ...]
#! Ouput: [...]
#! Cycles: 54
export.reseed
    # Load previous state and update it
    # --------------------------------------------------------------------------------------------
    exec.get_rate_1
    # => [R1, DATA, ...] (6 cycles)

    exec.add_two_words
    # => [R1, ...] (16 cycles)

    exec.get_capacity
    swapw
    exec.get_rate_2
    # => [R2, R1, C, ...] (13 cycles)

    hperm
    # => [R2', R1', C', ...] (1 cycles)

    # Save the new state to memory
    # --------------------------------------------------------------------------------------------
    exec.constants::r2_ptr mem_storew
    dropw
    exec.constants::r1_ptr mem_storew
    dropw
    exec.constants::c_ptr mem_storew
    dropw
    # => [...] (18 cycles)
end


# COEFFICIENT GENERATION
# =============================================================================================

#! Generates a `num_tuples` tuples of random field elements and stores them in memory
#! starting from address `dest_ptr`. Each memory address holds two tuples.
#! TODO: Generalize by keeping track of something similar to the `output` variable in `RpoRandomCoin`
#! so that we keep track of already used randomness and know when there is a need to apply `hperm`.
#!
#! Input: [dest_ptr, num_tuples, ...]
#! Output: [...]
#!
#! Cycles: 69 + (22 * num_tuples) / 4
proc.generate_random_coefficients

    # Compute the loop counter. We use checked division to make sure the number is a multiple of 4.
    # If we use field division and num_tuples is not a multiple of 4 then we will enter into
    # a very large loop with high probability.
    push.0 dup movup.2 movup.3
    u32assert u32divmod.4
    assertz
    neg
    #=> [loop_ctr, dest_ptr, x, x, ...]
    # where loop_ctr = - num_tuples / 4; we negate the counter so that we can count up to 0

    exec.get_rate_1
    dup.5 mem_storew

    exec.get_rate_2
    dup.9 add.1 mem_storew
    #=> [R2, R1, loop_ctr, dest_ptr, 0, 0, ..]

    exec.get_capacity
    swapdw
    swapw
    swap add.2 swap
    #=> [loop_ctr, dest_ptr, 0, 0, R1, C, R2, ..]

    add.1 dup neq.0

    while.true

        swapw.3 hperm
        #=> [R2, R1, C, loop_ctr, dest_ptr, x, x, ...]

        # save R2 to mem[dest+1]; we use dup.13 here because it takes only 1 cycle
        dup.13 add.1 mem_storew
        #=> [R2, R1, C, loop_ctr, dest_ptr, x, x, ...]

        # save R1 to mem[dest]
        swapw dup.13 mem_storew swapw
        #=> [R2, R1, C, loop_ctr, dest_ptr, x, x, ...]

        # update destination pointer and loop counter
        swapw.3
        #=> [loop_ctr, dest_ptr, x, x, R1, C, R2, ...]

        swap add.2 swap
        #=> [loop_ctr, dest_ptr+2, x, x, R1, C, R2, ...]

        add.1 dup
        #=> [loop_ctr+1, loop_ctr+1, dest_ptr+2, x, x, R1, C, R2, ...]

        neq.0
    end

    # Save the new state of the random coin
    dropw
    exec.constants::r1_ptr mem_storew
    dropw
    exec.constants::c_ptr mem_storew
    dropw
    exec.constants::r2_ptr mem_storew
    dropw
    #=> [...]
end

#! Generates a `num_tuples` tuples of random field elements and stores them in memory
#! starting from address `dest_ptr`. Each memory address holds one tuple.
#! TODO: Generalize by keeping track of something similar to the `output` variable in `RpoRandomCoin`
#! so that we keep track of already used randomness and know when there is a need to apply `hperm`.
#!
#! Input: [dest_ptr, num_tuples, ...]
#! Output: [...]
#!
#! Cycles: 104 + (69 * num_tuples) / 4
proc.generate_random_coefficients_pad

    # Compute the loop counter. We use checked division to make sure the number is a multiple of 4.
    # If we use field division and num_tuples is not a multiple of 4 then we will enter into
    # a very large loop with high probability.
    push.0 dup movup.2 movup.3
    u32assert u32divmod.4
    assertz
    neg
    #=> [loop_ctr, dest_ptr, x, x, ...]
    # where loop_ctr = - num_tuples / 4; we negate the counter so that we can count up to 0

    exec.get_rate_1
    dup.5
    #=> [dest_ptr, a11, a10, a01, a00, loop_ctr, dest_ptr, x, x, ...]
    dup.4
    dup.4
    push.0.0
    dup.4
    mem_storew
    #=> [0, 0, a01, a00, dest_ptr, a11, a10, a01, a00, loop_ctr, dest_ptr, x, x, ...]

    dropw
    dup.2 dup.2 push.0.0
    movup.4 add.1 mem_storew
    #=> [0, 0, a11, a10, a11, a10, a01, a00, loop_ctr, dest_ptr, x, x, ...]

    exec.constants::r2_ptr mem_loadw
    dup.9  add.4 swap.10
    #=> [dest_ptr, a31, a30, a21, a20, a11, a10, a01, a00, loop_ctr, dest_ptr+4, x, x, ...]

    dup.4
    dup.4
    push.0.0
    dup.4 add.2
    mem_storew
    #=> [0, 0, a21, a20, dest_ptr, a31, a30, a21, a20, a11, a10, a01, a00, loop_ctr, dest_ptr+4, x, x, ...]
    dropw
    dup.2 dup.2 push.0.0
    movup.4 add.3 mem_storew
    #=> [0, 0, a31, a30, a31, a30, a21, a20, a11, a10, a01, a00, loop_ctr, dest_ptr+4, x, x, ...]

    exec.constants::c_ptr mem_loadw
    swapdw
    swapw
    #=> [loop_ctr, dest_ptr, 0, 0, R1, C, R2, ..]

    add.1 dup neq.0

    while.true

        swapw.3 hperm
        #=> [R2, R1, C, loop_ctr, dest_ptr, x, x, ...]

        # save R2 to mem[dest+1]; we use dup.13 here because it takes only 1 cycle
        dup.13
        dup.4 dup.4 push.0.0
        dup.4 add.2
        mem_storew
        #=> [0, 0, a21, a20, dest_ptr, a31, a30, a21, a20, a11, a10, a01, a00, C, loop_ctr, dest_ptr, x, x, ...]
        dropw
        dup.2 dup.2 push.0.0
        movup.4 add.3 mem_storew
        #=> [0, 0, a31, a30, a31, a30, a21, a20, a11, a10, a01, a00, C, loop_ctr, dest_ptr, x, x, ...]

        # save R1 to mem[dest]
        dropw
        swapw dup.13
        dup.4 dup.4 push.0.0
        dup.4
        mem_storew
        #=> [0, 0, a01, a00, dest_ptr, a11, a10, a01, a00, a31, a30, a21, a20, C, loop_ctr, dest_ptr, x, x, ...]
        dropw
        dup.2 dup.2 push.0.0
        movup.4 add.1 mem_storew
        #=> [0, 0, a01, a00, a11, a10, a01, a00, a31, a30, a21, a20, C, loop_ctr, dest_ptr, x, x, ...]

        # reshuffle and update destination pointer and loop counter
        dropw
        swapw
        swapw.3
        #=> [loop_ctr, dest_ptr, x, x, R1, C, R2, ...]

        swap add.4 swap
        #=> [loop_ctr, dest_ptr+2, x, x, R1, C, R2, ...]

        add.1 dup
        #=> [loop_ctr+1, loop_ctr+1, dest_ptr+2, x, x, R1, C, R2, ...]

        neq.0
    end

    # Save the new state of the random coin
    dropw
    exec.constants::r1_ptr mem_storew
    dropw
    exec.constants::c_ptr mem_storew
    dropw
    exec.constants::r2_ptr mem_storew
    dropw
    #=> [...]
end

#! Draw a list of random extension field elements related to the auxiliary trace and store the list
#! in memory from `aux_rand_elem_ptr` to `aux_rand_elem_ptr + 8 - 1`
#!
#! Input: [aux_rand_elem_ptr, ...]
#! Output: [...]
#! Cycles: 150
export.generate_aux_randomness

    push.16 swap
    exec.generate_random_coefficients
    #=> [...]
end

#! Draw constraint composition random coefficients and save them into memory in the region from
#! `compos_coef_ptr` `compos_coef_ptr + 118 - 1` as `(r1_1, r1_0, r0_1, r0_0)`
#!
#! Input: [compos_coef_ptr, ...]
#! Output: [...]
#! Cycles: 1309
export.generate_constraint_composition_coefficients

    push.236
    swap
    exec.generate_random_coefficients
    #=> [...]
end

#! Draw deep composition polynomial random coefficients and save them into memory in the region from
#! `deep_rand_coef_ptr` to `deep_rand_coef_ptr + 89 - 1` as `(0, 0, r0_1, r0_0)`
#! The number of coefficients is equal to:
#! 1. (72 + 9) * 2 Felt for the main and auxiliary traces.
#! 2. 8 * 2 Felt for constraint polynomial.
#! Total: 89 tuples of type (Felt, Felt)
#!
#! Input: [deep_rand_coef_ptr, ...]
#! Output: [...]
#! Cycles: 1693
export.generate_deep_composition_random_coefficients

    push.92
    swap
    exec.generate_random_coefficients_pad
    #=> [...]
end


# OOD POINT GENERATION
# =============================================================================================

#! Generate the OOD challenge point `z = (z0, z1)` and compute `z^N` where N is
#! the trace length. The resulting word `[(z_1, z_0)^N, z1, z0]` is stored in the
#! global memory address `exec.z_ptr` reservedfor it.
#!
#! Input: [X, ...]
#! Output: [...]
#! Note: The top word on the stack is consumed by this procedure.
#! Cycles: 21 + 10 * log(N)
export.generate_z_zN
    # Load z (first two felts of the random coin state) and log trace length N
    exec.constants::r1_ptr mem_loadw
    drop drop
    exec.constants::trace_length_log_ptr mem_load
    # => [log(trace_len), z_1, z_0, ...]

    dup.2 dup.2
    # => [z_1, z_0, log(trace_len), z_1, z_0, ...]

    # Compute z^N using the fact that z^N = z^(2^log(N))
    # Loop starts with `i=log(trace_len)`
    push.1
    while.true
        dup.1 dup.1 ext2mul
        # => [(z_1, z_0)^n, i, z_1, z_0, ...]

        dup.2 sub.1 swap.3 push.1 neq
        # => [b, (z_1, z_0)^n, i-1, z_1, z_0, ...]
    end

    movup.2 drop
    # => [(z_1, z_0)^n, z_1, z_0, ...]

    # Store z and z^N
    exec.constants::z_ptr mem_storew
    dropw
end


# INDEX GENERATION
# =============================================================================================

# Helper function for generating a list of indices that takes a word of random felts and saves
# to memory region referenced by `ptr` 4 random integers in the range 0..=(mask+1).
# `depth` is saved next to each of the 4 integers for use in subsequent steps.
#
# Input: [R, ptr, mask, depth, ...]
# Output:[...]
#
# Cycles: 100
proc.generate_four_integers
    # Get the first random felt
    dup.3               # [r0, R1, ptr, mask, depth, ...]
    u32split swap       # [r0_lo, r0_hi, R1, ptr, mask, depth, ...]
    dup.7               # [mask, r0_lo, r0_hi, R1, ptr, mask, depth, ...]
    u32and              # [r, r0_hi, R1, ptr, mask, depth, ...]
    dup.8 swap          # [r, depth, r0_hi, R1, ptr, mask, depth, ...]
    push.0 movdn.3      # [r, depth, r0_hi, 0, R1, ptr, mask, depth, ...]

    # Store and update pointer
    dup.8 add.1 swap.9  # [ptr, r, depth, r0_hi, 0, R1, ptr + 1, mask, depth, ...]
    mem_storew
    dropw               # [R1, ptr + 1, mask, depth, ...]

    # Get the second random felt
    dup.2               # [r1, R1, ptr, mask, depth, ...]
    u32split swap       # [r1_lo, r1_hi, R1, ptr, mask, depth, ...]
    dup.7               # [mask, r1_lo, r1_hi, R1, ptr, mask, depth, ...]
    u32and              # [r, r1_hi, R1, ptr, mask, depth, ...]
    dup.8 swap          # [r, depth, r1_hi, R1, ptr, mask, depth, ...]
    push.0 movdn.3      # [r, depth, r1_hi, 0, R1, ptr, mask, depth, ...]

    # Store and update pointer
    dup.8 add.1 swap.9  # [ptr, r, depth, r1_hi, 0, R1, ptr + 1, mask, depth, ...]
    mem_storew
    dropw               # [R1, ptr + 1, mask, depth, ...]

    # Get the third random felt
    dup.1
    u32split swap
    dup.7
    u32and
    dup.8 swap
    push.0 movdn.3

    # Store and update pointer
    dup.8 add.1 swap.9
    mem_storew
    dropw

    # Get the fourth random felt
    dup
    u32split swap
    dup.7
    u32and
    dup.8 swap
    push.0 movdn.3

    # Store and update pointer
    dup.8 add.1 swap.9
    mem_storew
    dropw
end

# Helper function for generating a list of indices. It takes a word of random felts and saves
# to a memory region, referenced by `ptr`, 3 random integers in the range 0..=(mask+1). This procedure
# is used to generate a list of random indices that are used in FRI. Moreover, this procedure
# is called first, and right after the PoW check, thus the first element in the rate portion of
# the state is discarded.
# `depth` is saved next to each of the 3 integers for use in subsequent steps.
#
# Input: [R, ptr, mask, depth, ...]
# Output:[R, ptr + 3, mask, depth, ...]
#
# Cycles: 75
proc.generate_three_integers
    # Get the second random felt
    dup.2               # [r0, R1, ptr, mask, depth, ...]
    u32split swap       # [r0_lo, r0_hi, R1, ptr, mask, depth, ...]
    dup.7               # [mask, r0_lo, r0_hi, R1, ptr, mask, depth, ...]
    u32and              # [r, r0_hi, R1, ptr, mask, depth, ...]
    dup.8 swap          # [r, depth, r0_hi, R1, ptr, mask, depth, ...]
    push.0 movdn.3      # [r, depth, r0_hi, 0, R1, ptr, mask, depth, ...]

    # Store and update pointer
    dup.8 add.1 swap.9  # [ptr, r, depth, r0_hi, 0, R1, ptr + 1, mask, depth, ...]
    mem_storew
    dropw               # [R1, ptr + 1, mask, depth, ...]

    # Get the second random felt
    dup.1               # [r1, R1, ptr, mask, depth, ...]
    u32split swap       # [r1_lo, r1_hi, R1, ptr, mask, depth, ...]
    dup.7               # [mask, r1_lo, r1_hi, R1, ptr, mask, depth, ...]
    u32and              # [r, r1_hi, R1, ptr, mask, depth, ...]
    dup.8 swap          # [r, depth, r1_hi, R1, ptr, mask, depth, ...]
    push.0 movdn.3      # [r, depth, r1_hi, 0, R1, ptr, mask, depth, ...]

    # Store and update pointer
    dup.8 add.1 swap.9  # [ptr, r, depth, r1_hi, 0, R1, ptr + 1, mask, depth, ...]
    mem_storew
    dropw               # [R1, ptr + 1, mask, depth, ...]

    # Get the third random felt
    dup.0
    u32split swap
    dup.7
    u32and
    dup.8 swap
    push.0 movdn.3

    # Store and update pointer
    dup.8 add.1 swap.9
    mem_storew
    dropw
end

#! Generate a list of `num_queries` number of random indices in the range
#! [0, lde_size] and store it in memory starting from `query_ptr`.
#! The list is stored as `(r, depth, y, y)` where `depth` is `log(lde_domain_size)`.
#!`depth` is needed when computing the deep queries.
#! TODO: the case of duplicate queries
#!
#! Input: [query_ptr, num_queries, ...]
#! Output: [...]
#!
#! Cycles: 267 + q * 236 + r * 29 where q = num_queries / 8 and r = num_queries % 8
#!
#! NOTE: This procedure is called first, and right after the PoW check, thus the first element
#! in the rate portion of the state is discarded.
#! NOTE: The cycles count can be estimated, using the fact that r < 8, via the more compact formula
#!  470 + 236 * (num_queries / 8)
export.generate_list_indices
    # Create mask
    padw
    exec.constants::lde_size_ptr mem_loadw
    movup.2 drop
    movup.2 drop
    sub.1
    #=> [mask, depth, query_ptr, num_queries] where depth = log(lde_size)

    # Get address holding the integers (this will later hold the FRI queries)
    movup.2
    #=> [query_ptr, mask, depth, num_queries]

    # Load the first half of the rate portion of the state of the random coin. We discard the first
    # element as it is used for PoW and use the remaining the 3.
    exec.get_rate_1
    exec.generate_three_integers

    # Load the second half of the rate portion of the state of the random coin.
    exec.constants::r2_ptr mem_loadw
    exec.generate_four_integers
    #=> [R2, query_ptr, mask, depth, num_queries, ...]

    # Squeeze
    exec.constants::c_ptr mem_loadw
    exec.get_rate_1
    exec.get_rate_2
    hperm

    exec.rpo::squeeze_digest
    #=> [R1, query_ptr, mask, depth, num_queries, ...]


    # Use `num_queries` to iterate.

    ## Subtract the 7 elements we have already generated above.
    movup.7
    push.7 sub

    ## Divide by 8 to get the number of iterations
    u32assert u32divmod.8
    #=> [remainder, quotient, X, query_ptr, mask, depth, ...]

    ## Save remainder for later use
    movdn.8

    ## Use `quotient` to iterate
    dup movdn.8
    push.0 neq
    while.true
        exec.generate_four_integers

        exec.constants::r2_ptr mem_loadw
        exec.generate_four_integers
        #=> [R2, query_ptr, mask, depth, num_queries, ...]

        # Squeeze
        exec.constants::c_ptr mem_loadw
        exec.get_rate_1
        exec.get_rate_2
        hperm

        exec.rpo::squeeze_digest
        #=> [R1, query_ptr, mask, depth, num_queries, ...]
        movup.7 sub.1 dup movdn.8
        push.0 neq
    end


    ## Use remainder

    ### Put the remaining number of queries to generate in the appropriate stack position
    movup.8 movdn.7

    ### Load the second half of the rate portion of the state of the random coin.
    padw exec.constants::r2_ptr mem_loadw
    #=> [R2, R1, query_ptr, mask, depth, num_queries, ...]

    ### Iterate over remainder
    dup.11 sub.1 swap.12
    neq.0
    while.true
        movup.7
        u32split swap       # [r0_lo, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
        dup.10              # [mask, r0_lo, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
        u32and              # [r, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
        dup.11 swap         # [r, depth, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
        push.0 movdn.3      # [r, depth, r0_hi, 0, R2, r3, r2, r1, ptr, mask, depth, ...]

        # Store and update pointer
        dup.11 add.1 swap.12  # [ptr, r, depth, r0_hi, 0, R2, r3, r2, r1, ptr + 1, mask, depth, ...]
        mem_storew
        drop drop drop               # [x, R2, r3, r2, r1, ptr + 1, mask, depth, ...]
        dup.11 sub.1 swap.12
        push.0 neq
    end

    dropw dropw  dropw drop
end


# PROOF-OF-WORK CHECK
# =============================================================================================

#! Check that the Proof-of-Work contained in the nonce is equal to the required number
#! of bits prescribed by grinding bits. The grinding factor is assumed to be less than 32.
#!
#! Input: [grinding_factor, ...]
#! Output: [...]
#! Cycles: 73
export.check_pow

    # Compute the mask.
    pow2
    u32assert u32overflowing_sub.1 assertz
    #=> [mask, ...]

    # Load Capacity portion
    exec.get_capacity

    # Load first half of rate portion and add pow witness to first element of rate
    exec.get_rate_1
    adv_push.1
    dup.4
    add
    swap.4
    drop

    # Load the second half of rate portion and apply the permutation
    padw
    exec.constants::r2_ptr mem_loadw
    hperm
    #=> [R2, R1, C, mask, ...]

    # Save the new random coin state
    exec.constants::r2_ptr mem_storew
    dropw
    exec.constants::r1_ptr mem_storew
    swapw
    exec.constants::c_ptr mem_storew
    dropw
    drop drop drop
    #=> [R10, mask]

    # Make sure the PoW is valid
    u32split
    drop
    u32and
    assertz
    drop
    #=> [...]
end