#! Keccak-p[1600, 24] permutation's θ step mapping function, which is implemented #! in terms of 32 -bit word size ( bit interleaved representation ) #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/sha3.hpp#L55-L98 for original implementation #! #! Expected stack state : #! #! [state_addr, ...] #! #! Final stack state : #! #! [ ... ] #! #! Whole keccak-p[1600, 24] state can be represented using fifty u32 elements i.e. 13 absolute memory addresses #! s.t. last two elements of 12 -th ( when indexed from zero ) memory address are zeroed. #! #! Consecutive memory addresses can be computed by repeated application of `add.1`. proc.theta.3 dup locaddr.0 mem_store # compute (S[0] ^ S[10] ^ S[20] ^ S[30] ^ S[40], S[1] ^ S[11] ^ S[21] ^ S[31] ^ S[41]) # bring S[0], S[1] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.2 add.2 # bring S[10], S[11] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.3 # bring S[20], S[21] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.2 # bring S[30], S[31] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.3 # bring S[40], S[41] push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.2 u32xor swap movup.2 u32xor swap # stack = [c0, c1] # compute (S[2] ^ S[12] ^ S[22] ^ S[32] ^ S[42], S[3] ^ S[13] ^ S[23] ^ S[33] ^ S[43]) locaddr.0 mem_load # bring S[2], S[3] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.2 add.3 # bring S[12], S[13] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.2 # bring S[22], S[23] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.3 # bring S[32], S[33] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.2 # bring S[42], S[43] push.0.0.0.0 movup.4 mem_loadw drop drop movup.2 u32xor swap movup.2 u32xor swap movup.3 movup.3 # stack = [c0, c1, c2, c3] locaddr.1 mem_storew dropw # compute (S[4] ^ S[14] ^ S[24] ^ S[34] ^ S[44], S[5] ^ S[15] ^ S[25] ^ S[35] ^ S[45]) locaddr.0 mem_load add.1 # bring S[4], S[5] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.2 add.2 # bring S[14], S[15] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.3 # bring S[24], S[25] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.2 # bring S[34], S[35] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.3 # bring S[44], S[45] push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.2 u32xor swap movup.2 u32xor swap # stack = [c4, c5] # compute (S[6] ^ S[16] ^ S[26] ^ S[36] ^ S[46], S[7] ^ S[17] ^ S[27] ^ S[37] ^ S[47]) locaddr.0 mem_load add.1 # bring S[6], S[7] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.2 add.3 # bring S[16], S[17] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.2 # bring S[26], S[27] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.3 # bring S[36], S[37] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.2 # bring S[46], S[47] push.0.0.0.0 movup.4 mem_loadw drop drop movup.2 u32xor swap movup.2 u32xor swap movup.3 movup.3 # stack = [c4, c5, c6, c7] locaddr.2 mem_storew dropw # compute (S[8] ^ S[18] ^ S[28] ^ S[38] ^ S[48], S[9] ^ S[19] ^ S[29] ^ S[39] ^ S[49]) locaddr.0 mem_load add.2 # bring S[8], S[9] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.2 add.2 # bring S[18], S[19] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.3 # bring S[28], S[29] dup push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.2 # bring S[38], S[39] dup push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 u32xor swap movup.3 u32xor swap movup.2 add.3 # bring S[48], S[49] push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.2 u32xor swap movup.2 u32xor swap # stack = [c8, c9] locaddr.2 push.0.0.0.0 movup.4 mem_loadw locaddr.1 push.0.0.0.0 movup.4 mem_loadw # stack = [c0, c1, c2, c3, c4, c5, c6, c7, c8, c9] dup.8 dup.4 u32rotl.1 u32xor dup.10 dup.4 u32xor dup.2 dup.8 u32rotl.1 u32xor dup.4 dup.8 u32xor movup.6 dup.11 u32rotl.1 u32xor movup.7 dup.10 u32xor movup.8 movup.13 u32rotl.1 u32xor movup.9 movup.12 u32xor movup.10 movup.10 u32rotl.1 u32xor movup.10 movup.10 u32xor # stack = [d9, d8, d7, d6, d5, d4, d3, d2, d1, d0] swap movup.2 movup.3 movup.4 movup.5 movup.6 movup.7 movup.8 movup.9 # stack = [d0, d1, d2, d3, d4, d5, d6, d7, d8, d9] locaddr.0 mem_load # compute state[0..4) dup push.0.0.0.0 movup.4 mem_loadw dup.5 u32xor swap dup.6 u32xor swap movup.2 dup.7 u32xor movdn.2 movup.3 dup.8 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[4..8) dup push.0.0.0.0 movup.4 mem_loadw dup.9 u32xor swap dup.10 u32xor swap movup.2 dup.11 u32xor movdn.2 movup.3 dup.12 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[8..12) dup push.0.0.0.0 movup.4 mem_loadw dup.13 u32xor swap dup.14 u32xor swap movup.2 dup.5 u32xor movdn.2 movup.3 dup.6 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[12..16) dup push.0.0.0.0 movup.4 mem_loadw dup.7 u32xor swap dup.8 u32xor swap movup.2 dup.9 u32xor movdn.2 movup.3 dup.10 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[16..20) dup push.0.0.0.0 movup.4 mem_loadw dup.11 u32xor swap dup.12 u32xor swap movup.2 dup.13 u32xor movdn.2 movup.3 dup.14 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[20..24) dup push.0.0.0.0 movup.4 mem_loadw dup.5 u32xor swap dup.6 u32xor swap movup.2 dup.7 u32xor movdn.2 movup.3 dup.8 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[24..28) dup push.0.0.0.0 movup.4 mem_loadw dup.9 u32xor swap dup.10 u32xor swap movup.2 dup.11 u32xor movdn.2 movup.3 dup.12 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[28..32) dup push.0.0.0.0 movup.4 mem_loadw dup.13 u32xor swap dup.14 u32xor swap movup.2 dup.5 u32xor movdn.2 movup.3 dup.6 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[32..36) dup push.0.0.0.0 movup.4 mem_loadw dup.7 u32xor swap dup.8 u32xor swap movup.2 dup.9 u32xor movdn.2 movup.3 dup.10 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[36..40) dup push.0.0.0.0 movup.4 mem_loadw dup.11 u32xor swap dup.12 u32xor swap movup.2 dup.13 u32xor movdn.2 movup.3 dup.14 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[40..44) dup push.0.0.0.0 movup.4 mem_loadw movup.5 u32xor swap movup.5 u32xor swap movup.2 movup.5 u32xor movdn.2 movup.3 movup.5 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[44..48) dup push.0.0.0.0 movup.4 mem_loadw movup.5 u32xor swap movup.5 u32xor swap movup.2 movup.5 u32xor movdn.2 movup.3 movup.5 u32xor movdn.3 dup.4 mem_storew dropw add.1 # compute state[48..50) dup push.0.0.0.0 movup.4 mem_loadw movup.5 u32xor swap movup.5 u32xor swap movup.4 mem_storew dropw end #! Keccak-p[1600, 24] permutation's ρ step mapping function, which is implemented #! in terms of 32 -bit word size ( bit interleaved representation ) #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/sha3.hpp#L115-L147 for original implementation #! #! Expected stack state : #! #! [state_addr, ...] #! #! Final stack state : #! #! [ ... ] #! #! Whole keccak-p[1600, 24] state can be represented using fifty u32 elements i.e. 13 absolute memory addresses #! s.t. last two elements of 12 -th ( when indexed from zero ) memory address are zeroed. #! #! Consecutive memory addresses can be computed by repeated application of `add.1`. proc.rho.1 dup locaddr.0 mem_store # rotate state[0..4) push.0.0.0.0 dup.4 mem_loadw movup.3 u32rotl.1 movdn.2 movup.4 dup add.1 movdn.5 mem_storew # rotate state[4..8) dup.4 mem_loadw u32rotl.31 swap u32rotl.31 swap movup.2 u32rotl.14 movdn.2 movup.3 u32rotl.14 movdn.3 movup.4 dup add.1 movdn.5 mem_storew # rotate state[8..12) dup.4 mem_loadw u32rotl.13 swap u32rotl.14 movup.2 u32rotl.18 movdn.2 movup.3 u32rotl.18 movdn.3 movup.4 dup add.1 movdn.5 mem_storew # rotate state[12..16) dup.4 mem_loadw u32rotl.22 swap u32rotl.22 swap movup.2 u32rotl.3 movdn.2 movup.3 u32rotl.3 movdn.3 movup.4 dup add.1 movdn.5 mem_storew # rotate state[16..20) dup.4 mem_loadw u32rotl.27 swap u32rotl.28 movup.2 u32rotl.10 movdn.2 movup.3 u32rotl.10 movdn.3 movup.4 dup add.1 movdn.5 mem_storew # rotate state[20..24) dup.4 mem_loadw u32rotl.1 swap u32rotl.2 movup.2 u32rotl.5 movdn.2 movup.3 u32rotl.5 movdn.3 movup.4 dup add.1 movdn.5 mem_storew # rotate state[24..28) dup.4 mem_loadw u32rotl.21 swap u32rotl.22 movup.2 u32rotl.12 movdn.3 movup.2 u32rotl.13 movdn.2 movup.4 dup add.1 movdn.5 mem_storew # rotate state[28..32) dup.4 mem_loadw u32rotl.19 swap u32rotl.20 movup.2 u32rotl.20 movdn.3 movup.2 u32rotl.21 movdn.2 movup.4 dup add.1 movdn.5 mem_storew # rotate state[32..36) dup.4 mem_loadw u32rotl.22 swap u32rotl.23 movup.2 u32rotl.7 movdn.3 movup.2 u32rotl.8 movdn.2 movup.4 dup add.1 movdn.5 mem_storew # rotate state[36..40) dup.4 mem_loadw u32rotl.10 swap u32rotl.11 movup.2 u32rotl.4 movdn.2 movup.3 u32rotl.4 movdn.3 movup.4 dup add.1 movdn.5 mem_storew # rotate state[40..44) dup.4 mem_loadw u32rotl.9 swap u32rotl.9 swap movup.2 u32rotl.1 movdn.2 movup.3 u32rotl.1 movdn.3 movup.4 dup add.1 movdn.5 mem_storew # rotate state[44..48) dup.4 mem_loadw u32rotl.30 swap u32rotl.31 movup.2 u32rotl.28 movdn.2 movup.3 u32rotl.28 movdn.3 movup.4 dup add.1 movdn.5 mem_storew # rotate state[48..50) dup.4 mem_loadw u32rotl.7 swap u32rotl.7 swap movup.4 mem_storew dropw end #! Keccak-p[1600, 24] permutation's π step mapping function, which is implemented #! in terms of 32 -bit word size ( bit interleaved representation ) #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/sha3.hpp#L169-L207 for original implementation #! #! Expected stack state : #! #! [state_addr, ...] #! #! Final stack state : #! #! [ ... ] #! #! Whole keccak-p[1600, 24] state can be represented using fifty u32 elements i.e. 13 absolute memory addresses #! s.t. last two elements of 12 -th ( when indexed from zero ) memory address are zeroed. #! #! Consecutive memory addresses can be computed by repeated application of `add.1`. proc.pi.14 dup locaddr.0 mem_store locaddr.1 swap push.0.0.0.0 # place state[0..4) to desired location(s) dup.4 mem_loadw push.0.0 movdn.3 movdn.3 dup.7 mem_storew drop drop movdn.3 movdn.3 dup.5 add.5 mem_storew # place state[4..8) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0 movdn.3 movdn.3 dup.7 add.10 mem_storew drop drop dup.5 add.2 mem_storew # place state[8..12) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0 dup.7 add.7 mem_storew movup.2 drop movup.2 drop movdn.3 movdn.3 dup.5 add.8 mem_storew # place state[12..16) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw dup.5 push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop dup.7 mem_storew dup.7 add.5 mem_loadw movup.2 drop movup.2 drop dup.5 add.5 mem_storew # place state[16..20) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw dup.5 add.10 push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop dup.7 add.10 mem_storew dropw push.0.0 movdn.3 movdn.3 dup.5 add.3 mem_storew # place state[20..24) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw dup.5 add.3 push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop dup.7 add.3 mem_storew dup.7 add.8 mem_loadw movup.2 drop movup.2 drop dup.5 add.8 mem_storew # place state[24..28) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0 movdn.3 movdn.3 dup.7 add.1 mem_storew drop drop movdn.3 movdn.3 dup.5 add.6 mem_storew # place state[28..32) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw dup.5 add.11 mem_storew # place state[32..36) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0 movdn.3 movdn.3 dup.7 add.4 mem_storew drop drop movdn.3 movdn.3 dup.5 add.9 mem_storew # place state[36..40) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw dup.5 add.1 push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop dup.7 add.1 mem_storew dup.7 add.6 mem_loadw movup.2 drop movup.2 drop dup.5 add.6 mem_storew # place state[40..44) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw dup.5 add.7 push.0.0.0.0 movup.4 mem_loadw drop drop movup.3 movup.3 dup.7 add.7 mem_storew dropw push.0.0 movdn.3 movdn.3 dup.5 add.12 mem_storew # place state[44..48) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw dup.5 add.4 push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop dup.7 add.4 mem_storew dup.7 add.9 mem_loadw movup.2 drop movup.2 drop dup.5 add.9 mem_storew # place state[48..50) to desired location(s) movup.4 add.1 movdn.4 dup.4 mem_loadw dup.5 add.2 push.0.0.0.0 movup.4 mem_loadw drop drop movdn.3 movdn.3 dup.7 add.2 mem_storew drop drop # memcpy movup.4 drop locaddr.0 mem_load movdn.4 repeat.13 dup.5 mem_loadw dup.4 mem_storew movup.4 add.1 movdn.4 movup.5 add.1 movdn.5 end dropw drop drop end #! Keccak-p[1600, 24] permutation's χ step mapping function, which is implemented #! in terms of 32 -bit word size ( bit interleaved representation ) #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/sha3.hpp#L233-L271 for original implementation #! #! Expected stack state : #! #! [state_addr, ...] #! #! Final stack state : #! #! [ ... ] #! #! Whole keccak-p[1600, 24] state can be represented using fifty u32 elements i.e. 13 absolute memory addresses #! s.t. last two elements of 12 -th ( when indexed from zero ) memory address are zeroed. #! #! Consecutive memory addresses can be computed by repeated application of `add.1`. proc.chi.4 dup locaddr.0 mem_store # process state[0..10) dup push.0.0.0.0 movup.4 mem_loadw drop drop u32not swap u32not swap movup.2 add.1 dup movdn.3 push.0.0.0.0 movup.4 mem_loadw dup.1 dup.1 movup.6 u32and swap movup.6 u32and swap movup.3 u32not movup.3 u32not movup.4 u32and swap movup.4 u32and swap movup.3 movup.3 locaddr.1 mem_storew dup.4 mem_loadw drop drop u32not swap u32not swap movup.2 add.1 dup movdn.3 push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop dup.1 dup.1 movup.4 u32and swap movup.4 u32and swap movup.3 movup.3 movup.4 sub.2 push.0.0.0.0 movup.4 mem_loadw movup.5 u32not movup.5 u32not dup.2 u32and swap dup.3 u32and swap movup.7 movup.7 locaddr.2 mem_storew dropw u32not swap u32not swap movup.2 u32and swap movup.2 u32and swap locaddr.0 mem_load push.0.0.0.0 dup.4 mem_loadw locaddr.1 push.0.0.0.0 movup.4 mem_loadw movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw locaddr.2 push.0.0.0.0 movup.4 mem_loadw movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw movup.5 u32xor swap movup.5 u32xor swap dup.4 mem_storew # process state[10..20) movup.4 add.1 movdn.4 dup.4 mem_loadw u32not swap u32not swap dup.3 dup.3 movup.2 u32and swap movup.2 u32and swap push.0.0 locaddr.1 mem_storew movup.6 add.1 dup movdn.7 mem_loadw movup.5 movup.5 u32not swap u32not swap dup.2 u32and swap dup.3 u32and swap movup.3 movup.3 u32not swap u32not swap dup.4 u32and swap dup.5 u32and swap movup.3 movup.3 locaddr.2 mem_storew movup.6 sub.2 dup movdn.7 mem_loadw drop drop dup.1 dup.1 movup.4 u32not movup.5 u32not swap movup.2 u32and swap movup.2 u32and swap movup.3 movup.3 movup.4 add.1 push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.3 movup.3 u32not swap u32not swap movup.2 u32and swap movup.2 u32and swap movup.3 movup.3 locaddr.3 mem_storew locaddr.0 mem_load add.2 dup movdn.5 mem_loadw push.0.0.0.0 loc_loadw.1 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.2 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.3 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew # process state[20..30) movup.4 add.1 movdn.4 dup.4 mem_loadw drop drop u32not swap u32not swap movup.2 add.1 movdn.2 dup.2 push.0.0.0.0 movup.4 mem_loadw dup.1 dup.1 movup.6 u32and swap movup.6 u32and swap movup.3 movup.3 u32not swap u32not swap dup.4 u32and swap dup.5 u32and swap movup.3 movup.3 loc_storew.1 movup.6 add.1 movdn.6 dup.6 mem_loadw movup.2 drop movup.2 drop dup.1 dup.1 movup.5 movup.5 u32not swap u32not swap movup.2 u32and swap movup.2 u32and swap movup.4 sub.2 movdn.4 dup.4 push.0.0.0.0 movup.4 mem_loadw movup.7 movup.7 u32not swap u32not swap dup.3 dup.3 movup.2 u32and swap movup.2 u32and swap movup.7 movup.7 loc_storew.2 dropw u32not swap u32not swap movup.2 u32and swap movup.2 u32and swap push.0.0 movdn.3 movdn.3 loc_storew.3 dup.4 mem_loadw push.0.0.0.0 loc_loadw.1 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.2 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.3 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew # process state[30..40) movup.4 add.1 movdn.4 dup.4 mem_loadw u32not swap u32not swap dup.3 dup.3 movup.2 u32and swap movup.2 u32and swap push.0.0 loc_storew.1 movup.6 add.1 movdn.6 dup.6 mem_loadw movup.5 movup.5 u32not swap u32not swap dup.3 dup.3 movup.2 u32and swap movup.2 u32and swap movup.3 movup.3 u32not swap u32not swap dup.5 dup.5 movup.2 u32and swap movup.2 u32and swap movup.3 movup.3 loc_storew.2 movup.6 sub.2 movdn.6 dup.6 mem_loadw drop drop movup.3 movup.3 u32not swap u32not swap dup.3 dup.3 movup.2 u32and swap movup.2 u32and swap movup.4 add.1 movdn.4 dup.4 push.0.0.0.0 movup.4 mem_loadw movup.2 drop movup.2 drop movup.5 movup.5 u32not swap u32not swap movup.2 u32and swap movup.2 u32and swap movup.3 movup.3 loc_storew.3 movup.4 sub.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.1 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.2 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.3 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew # process state[40..50) movup.4 add.1 movdn.4 dup.4 mem_loadw drop drop movup.2 add.1 movdn.2 dup.2 push.0.0.0.0 movup.4 mem_loadw movup.5 movup.5 u32not swap u32not swap dup.3 dup.3 movup.2 u32and swap movup.2 u32and swap movup.3 movup.3 u32not swap u32not swap dup.5 dup.5 movup.2 u32and swap movup.2 u32and swap movup.3 movup.3 loc_storew.1 movup.6 add.1 movdn.6 dup.6 mem_loadw movup.2 drop movup.2 drop movup.3 movup.3 u32not swap u32not swap dup.3 dup.3 movup.2 u32and swap movup.2 u32and swap movup.4 sub.2 movdn.4 dup.4 push.0.0.0.0 movup.4 mem_loadw movup.7 movup.7 u32not swap u32not swap dup.3 dup.3 movup.2 u32and swap movup.2 u32and swap movup.7 movup.7 loc_storew.2 dropw u32not swap u32not swap movup.2 u32and swap movup.2 u32and swap push.0.0 movdn.3 movdn.3 loc_storew.3 dup.4 mem_loadw push.0.0.0.0 loc_loadw.1 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.2 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew movup.4 add.1 movdn.4 dup.4 mem_loadw push.0.0.0.0 loc_loadw.3 movup.4 u32xor swap movup.4 u32xor swap movup.2 movup.4 u32xor movdn.2 movup.3 movup.4 u32xor movdn.3 dup.4 mem_storew dropw drop end #! Keccak-p[1600, 24] permutation's ι ( iota ) function, which is #! implemented in terms of 32 -bit word size ( bit interleaved form ); #! imagine https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/sha3.hpp#L288-L306 #! invoked with (c0, c1) as template arguments #! #! Expected stack state : #! #! [state_addr, c0, c1, ...] #! #! Final stack state : #! #! [ ... ] #! #! All this routine does is #! #! state[0] ^= c0 #! state[1] ^= c1 proc.iota dup push.0.0.0.0 movup.4 mem_loadw movup.5 u32xor swap movup.5 u32xor swap movup.4 mem_storew dropw end #! Keccak-p[1600, 24] permutation round, without `iota` function ( all other #! functions i.e. `theta`, `rho`, `pi`, `chi` are applied in order ) #! #! As `iota` function involves xoring constant factors with first lane of state array #! ( read state[0, 0] ), it's required to invoke them seperately after completion of #! this procedure's execution. #! #! Expected stack state : #! #! [start_addr, ... ] #! #! After finishing execution, stack looks like #! #! [ ... ] #! #! Whole keccak-p[1600, 24] state can be represented using fifty u32 elements i.e. 13 absolute memory addresses #! s.t. last two elements of 12 -th ( when indexed from zero ) memory address are zeroed. #! #! Consecutive memory addresses can be computed by repeated application of `add.1`. #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/sha3.hpp#L325-L340 proc.round dup exec.theta dup exec.rho dup exec.pi exec.chi end #! Keccak-p[1600, 24] permutation, applying 24 rounds on state array of size 5 x 5 x 64, #! where each 64 -bit lane is represented in bit interleaved form ( in terms of two 32 -bit words ). #! #! Expected stack state : #! #! [start_addr, ... ] #! #! After finishing execution, stack looks like #! #! [ ... ] #! #! Whole keccak-p[1600, 24] state can be represented using fifty u32 elements i.e. 13 absolute memory addresses #! s.t. last two elements of 12 -th ( when indexed from zero ) memory address are zeroed. #! #! Consecutive memory addresses can be computed by repeated application of `add.1`. #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/sha3.hpp#L379-L427 proc.keccak_p # permutation round 1 dup exec.round push.0.1 dup.2 exec.iota # permutation round 2 dup exec.round push.137.0 dup.2 exec.iota # permutation round 3 dup exec.round push.2147483787.0 dup.2 exec.iota # permutation round 4 dup exec.round push.2147516544.0 dup.2 exec.iota # permutation round 5 dup exec.round push.139.1 dup.2 exec.iota # permutation round 6 dup exec.round push.32768.1 dup.2 exec.iota # permutation round 7 dup exec.round push.2147516552.1 dup.2 exec.iota # permutation round 8 dup exec.round push.2147483778.1 dup.2 exec.iota # permutation round 9 dup exec.round push.11.0 dup.2 exec.iota # permutation round 10 dup exec.round push.10.0 dup.2 exec.iota # permutation round 11 dup exec.round push.32898.1 dup.2 exec.iota # permutation round 12 dup exec.round push.32771.0 dup.2 exec.iota # permutation round 13 dup exec.round push.32907.1 dup.2 exec.iota # permutation round 14 dup exec.round push.2147483659.1 dup.2 exec.iota # permutation round 15 dup exec.round push.2147483786.1 dup.2 exec.iota # permutation round 16 dup exec.round push.2147483777.1 dup.2 exec.iota # permutation round 17 dup exec.round push.2147483777.0 dup.2 exec.iota # permutation round 18 dup exec.round push.2147483656.0 dup.2 exec.iota # permutation round 19 dup exec.round push.131.0 dup.2 exec.iota # permutation round 20 dup exec.round push.2147516419.0 dup.2 exec.iota # permutation round 21 dup exec.round push.2147516552.1 dup.2 exec.iota # permutation round 22 dup exec.round push.2147483784.0 dup.2 exec.iota # permutation round 23 dup exec.round push.32768.1 dup.2 exec.iota # permutation round 24 dup exec.round push.2147516546.0 movup.2 exec.iota end #! Given two 32 -bit unsigned integers ( standard form ), representing upper and lower #! bits of a 64 -bit unsigned integer ( actually a keccak-[1600, 24] lane ), #! this function converts them into bit interleaved representation, where two 32 -bit #! unsigned integers ( even portion & then odd portion ) hold bits in even and odd #! indices of 64 -bit unsigned integer ( remember it's represented in terms of #! two 32 -bit elements ) #! #! Input stack state : #! #! [hi, lo, ...] #! #! After application of bit interleaving, stack looks like #! #! [even, odd, ...] #! #! Read more about bit interleaved representation in section 2.1 of https://keccak.team/files/Keccak-implementation-3.2.pdf #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/utils.hpp#L123-L149 #! for reference implementation in higher level language. export.to_bit_interleaved push.0.0 repeat.16 u32shr.1 swap u32shr.1 swap # --- dup.3 dup.3 push.1 u32and swap push.1 u32and swap u32shl.31 swap u32shl.15 swap u32xor u32xor # --- dup.3 dup.3 push.2 u32and swap push.2 u32and swap u32shl.30 swap u32shl.14 swap movup.3 u32xor u32xor swap # --- movup.2 u32shr.2 movdn.2 movup.3 u32shr.2 movdn.3 end movup.2 drop movup.2 drop end #! Given two 32 -bit unsigned integers ( in bit interleaved form ), representing even and odd #! positioned bits of a 64 -bit unsigned integer ( actually a keccak-[1600, 24] lane ), #! this function converts them into standard representation, where two 32 -bit #! unsigned integers hold higher ( 32 -bit ) and lower ( 32 -bit ) bits of standard #! representation of 64 -bit unsigned integer #! #! Input stack state : #! #! [even, odd, ...] #! #! After application of logic, stack looks like #! #! [hi, lo, ...] #! #! This function reverts the action done by `to_bit_interleaved` function implemented above. #! #! Read more about bit interleaved representation in section 2.1 of https://keccak.team/files/Keccak-implementation-3.2.pdf #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/utils.hpp#L151-L175 #! for reference implementation in higher level language. export.from_bit_interleaved push.0.0 repeat.16 u32shr.2 swap u32shr.2 swap # --- dup.3 dup.3 push.1 u32and swap push.1 u32and u32shl.31 swap u32shl.30 u32xor movup.2 u32xor swap # --- dup.3 dup.3 push.65536 u32and swap push.65536 u32and u32shl.15 swap u32shl.14 u32xor u32xor # --- movup.2 u32shr.1 movdn.2 movup.3 u32shr.1 movdn.3 end movup.2 drop movup.2 drop end #! Given 64 -bytes input ( in terms of sixteen u32 elements on stack top ) to 2-to-1 #! keccak256 hash function, this function prepares 5 x 5 x 64 keccak-p[1600, 24] state #! bit array such that each of twenty five 64 -bit wide lane is represented in bit #! interleaved form, using two 32 -bit integers. After completion of execution of #! this function, state array should live in allocated memory ( total fifty u32 elements, stored in #! 13 consecutive memory addresses s.t. starting absolute address is provided ). #! #! Input stack state : #! #! [state_addr, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, ...] #! #! Note, state_addr is the starting absolute memory address where keccak-p[1600, 24] state #! is kept. Consecutive addresses can be computed by repeated application of `add.1` instruction. #! #! Final stack state : #! #! [...] #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/keccak_256.hpp#L73-L153 proc.to_state_array repeat.4 movdn.4 exec.to_bit_interleaved movup.3 movup.3 exec.to_bit_interleaved movup.3 movup.3 dup.4 mem_storew dropw add.1 end push.0.0.0.1 dup.4 mem_storew dropw add.1 push.0.0.0.0 dup.4 mem_storew dropw add.1 push.0.0.0.0 dup.4 mem_storew dropw add.1 push.0.0.0.0 dup.4 mem_storew dropw add.1 push.0.0.2147483648.0 dup.4 mem_storew dropw add.1 push.0.0.0.0 dup.4 mem_storew dropw add.1 push.0.0.0.0 dup.4 mem_storew dropw add.1 push.0.0.0.0 dup.4 mem_storew dropw add.1 push.0.0.0.0 movup.4 mem_storew dropw end #! Given 32 -bytes digest ( in terms of eight u32 elements on stack top ) in bit interleaved form, #! this function attempts to convert those into standard representation, where eight u32 elements #! live on stack top, each pair of them hold higher and lower bits of 64 -bit unsigned #! integer ( lane of keccak-p[1600, 24] state array ) #! #! Input stack state : #! #! [lane0_even, lane0_odd, lane1_even, lane1_odd, lane2_even, lane2_odd, lane3_even, lane3_odd, ...] #! #! Output stack state : #! #! [dig0_hi, dig0_lo, dig1_hi, dig1_lo, dig2_hi, dig2_lo, dig3_hi, dig3_lo, ...] #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/keccak_256.hpp#L180-L209 proc.to_digest repeat.4 movup.7 movup.7 exec.from_bit_interleaved end end #! Given 64 -bytes input, in terms of sixteen 32 -bit unsigned integers, where each pair #! of them holding higher & lower 32 -bits of 64 -bit unsigned integer ( reinterpreted on #! host CPU from little endian byte array ) respectively, this function computes 32 -bytes #! keccak256 digest, held on stack top, represented in terms of eight 32 -bit unsigned integers, #! where each pair of them keeps higher and lower 32 -bits of 64 -bit unsigned integer respectively #! #! Expected stack state : #! #! [iword0, iword1, iword2, iword3, iword4, iword5, iword6, iword7, #! iword8, iword9, iword10, iword11, iword12, iword13, iword14, iword15, ... ] #! #! Final stack state : #! #! [oword0, oword1, oword2, oword3, oword4, oword5, oword6, oword7, ... ] #! #! See https://github.com/itzmeanjan/merklize-sha/blob/1d35aae9da7fed20127489f362b4bc93242a516c/include/keccak_256.hpp#L232-L257 export.hash.13 # prapare keccak256 state from input message locaddr.0 exec.to_state_array # apply keccak-p[1600, 24] permutation locaddr.0 exec.keccak_p # prapare keccak256 digest from state push.0.0.0.0 loc_loadw.1 push.0.0.0.0 loc_loadw.0 exec.to_digest end