include "../../../../arch/x64/Vale.X64.InsBasic.vaf" include "../../../../arch/x64/Vale.X64.InsMem.vaf" include "../../../../arch/x64/Vale.X64.InsStack.vaf" include "Vale.Curve25519.X64.FastMul.vaf" include "Vale.Curve25519.X64.FastSqr.vaf" include "Vale.Curve25519.X64.FastHybrid.vaf" include{:fstar}{:open} "Vale.Curve25519.Fast_defs" include{:fstar}{:open} "Vale.Curve25519.Fast_lemmas_external" //include{:fstar}{:open} "Vale.Curve25519.FastHybrid_helpers" //include{:fstar}{:open} "Vale.Curve25519.FastUtil_helpers" include{:fstar}{:open} "Vale.X64.CPU_Features_s" module Vale.Curve25519.X64.FastWide #verbatim{:interface} open Vale.Def.Types_s open Vale.Arch.Types open Vale.Arch.HeapImpl open Vale.X64.Machine_s open Vale.X64.Memory open Vale.X64.Stack_i open Vale.X64.State open Vale.X64.Decls open Vale.X64.InsBasic open Vale.X64.InsMem open Vale.X64.InsStack open Vale.X64.QuickCode open Vale.X64.QuickCodes open Vale.Curve25519.Fast_defs open Vale.X64.CPU_Features_s #endverbatim #verbatim{:implementation} open Vale.Def.Types_s open Vale.Arch.Types open Vale.X64.Machine_s open Vale.X64.Memory open Vale.X64.Stack_i open Vale.X64.State open Vale.X64.Decls open Vale.X64.InsBasic open Vale.X64.InsMem open Vale.X64.InsStack open Vale.X64.QuickCode open Vale.X64.QuickCodes open FStar.Tactics open Vale.Curve25519.Fast_defs open Vale.Curve25519.Fast_lemmas_external //open Vale.Curve25519.FastHybrid_helpers //open Vale.Curve25519.FastUtil_helpers open Vale.X64.CPU_Features_s open Vale.Curve25519.X64.FastMul open Vale.Curve25519.X64.FastSqr open Vale.Curve25519.X64.FastHybrid #reset-options "--z3rlimit 60" #endverbatim procedure Fmul( ghost tmp_b:buffer64, ghost inA_b:buffer64, ghost dst_b:buffer64, ghost inB_b:buffer64) {:public} {:quick} {:exportSpecs} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= r15; inB_ptr @= rcx; tmp_in := rdi; inA_in := rsi; dst_in := r15; inB_in := rcx; reads r15; modifies rax; rbx; rcx; rdx; rdi; rsi; r8; r9; r10; r11; r13; r14; efl; heap0; memLayout; requires adx_enabled && bmi2_enabled; is_initial_heap(memLayout, mem); buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, inB_b) || dst_b == inB_b; buffers_disjoint(dst_b, tmp_b) || dst_b == tmp_b; buffers_disjoint(tmp_b, inA_b); buffers_disjoint(tmp_b, inB_b); validDstAddrs64(mem, dst_in, dst_b, 4, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 4, memLayout, Secret); validSrcAddrs64(mem, inB_in, inB_b, 4, memLayout, Secret); validDstAddrs64(mem, tmp_in, tmp_b, 8, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let b0 := old(buffer64_read(inB_b, 0, mem)); let b1 := old(buffer64_read(inB_b, 1, mem)); let b2 := old(buffer64_read(inB_b, 2, mem)); let b3 := old(buffer64_read(inB_b, 3, mem)); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let a := pow2_four(a0, a1, a2, a3); let b := pow2_four(b0, b1, b2, b3); let d := pow2_four(d0, d1, d2, d3); d % prime == old(a * b) % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// modifies_buffer_2(dst_b, tmp_b, old(mem), mem); { CreateHeaplets(list( declare_buffer64(inA_b, 0, Secret, Immutable), declare_buffer64(inB_b, 0, Secret, Immutable), declare_buffer64(dst_b, 0, Secret, Mutable), declare_buffer64(tmp_b, 0, Secret, Mutable))); LargeComment("Compute the raw multiplication: tmp <- src1 * src2"); Fast_multiply(0, tmp_b, inA_b, inB_b); Newline(); Comment("Line up pointers"); Mov64(inA_ptr, tmp_ptr); Mov64(rdi, r15); LargeComment("Wrap the result back into the field"); Carry_wide(0, dst_b, tmp_b); DestroyHeaplets(); } procedure Fmul_stdcall( inline win:bool, ghost tmp_b:buffer64, ghost inA_b:buffer64, ghost dst_b:buffer64, ghost inB_b:buffer64) {:public} {:quick} {:exportSpecs} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= r15; inB_ptr @= rcx; tmp_in := (if win then rcx else rdi); inA_in := (if win then rdx else rsi); dst_in := (if win then r8 else rdx); inB_in := (if win then r9 else rcx); modifies rax; rbx; rcx; rdx; rsi; rdi; rbp; rsp; r8; r9; r10; r11; r13; r14; r15; efl; heap0; memLayout; stack; stackTaint; requires rsp == init_rsp(stack); is_initial_heap(memLayout, mem); adx_enabled && bmi2_enabled; buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, inB_b) || dst_b == inB_b; buffers_disjoint(dst_b, tmp_b) || dst_b == tmp_b; buffers_disjoint(tmp_b, inA_b); buffers_disjoint(tmp_b, inB_b); validDstAddrs64(mem, dst_in, dst_b, 4, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 4, memLayout, Secret); validSrcAddrs64(mem, inB_in, inB_b, 4, memLayout, Secret); validDstAddrs64(mem, tmp_in, tmp_b, 8, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let b0 := old(buffer64_read(inB_b, 0, mem)); let b1 := old(buffer64_read(inB_b, 1, mem)); let b2 := old(buffer64_read(inB_b, 2, mem)); let b3 := old(buffer64_read(inB_b, 3, mem)); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let a := pow2_four(a0, a1, a2, a3); let b := pow2_four(b0, b1, b2, b3); let d := pow2_four(d0, d1, d2, d3); d % prime == old(a * b) % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// modifies_buffer_2(dst_b, tmp_b, old(mem), mem); win ==> rbx == old(rbx); win ==> rbp == old(rbp); win ==> rdi == old(rdi); win ==> rsi == old(rsi); win ==> rsp == old(rsp); win ==> r13 == old(r13); win ==> r14 == old(r14); win ==> r15 == old(r15); !win ==> rbx == old(rbx); !win ==> rbp == old(rbp); !win ==> r13 == old(r13); !win ==> r14 == old(r14); !win ==> r15 == old(r15); rsp == old(rsp); { Push_Secret(r13); Push_Secret(r14); Push_Secret(r15); Push_Secret(rbx); // Line up the rest of the arguments inline if (win) { // Store callee-save registers Push_Secret(rsi); Push_Secret(rdi); Mov64(tmp_ptr, rcx); Mov64(inA_ptr, rdx); Mov64(dst_ptr, r8); Mov64(inB_ptr, r9); } else { Mov64(dst_ptr, rdx); } Fmul(tmp_b, inA_b, dst_b, inB_b); inline if (win) { Pop_Secret(rdi); Pop_Secret(rsi); } Pop_Secret(rbx); Pop_Secret(r15); Pop_Secret(r14); Pop_Secret(r13); } procedure Fmul2( ghost tmp_b:buffer64, ghost inA_b:buffer64, ghost dst_b:buffer64, ghost inB_b:buffer64) {:public} {:quick} {:exportSpecs} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= r15; inB_ptr @= rcx; tmp_in := rdi; inA_in := rsi; dst_in := r15; inB_in := rcx; reads r15; modifies rax; rbx; rcx; rdx; rdi; rsi; r8; r9; r10; r11; r13; r14; efl; heap0; memLayout; requires adx_enabled && bmi2_enabled; is_initial_heap(memLayout, mem); buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, inB_b) || dst_b == inB_b; buffers_disjoint(dst_b, tmp_b) || dst_b == tmp_b; buffers_disjoint(tmp_b, inA_b); buffers_disjoint(tmp_b, inB_b); validDstAddrs64(mem, dst_in, dst_b, 8, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 8, memLayout, Secret); validSrcAddrs64(mem, inB_in, inB_b, 8, memLayout, Secret); validDstAddrs64(mem, tmp_in, tmp_b, 16, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let b0 := old(buffer64_read(inB_b, 0, mem)); let b1 := old(buffer64_read(inB_b, 1, mem)); let b2 := old(buffer64_read(inB_b, 2, mem)); let b3 := old(buffer64_read(inB_b, 3, mem)); let a := pow2_four(a0, a1, a2, a3); let b := pow2_four(b0, b1, b2, b3); let a0' := old(buffer64_read(inA_b, 0 + 4, mem)); let a1' := old(buffer64_read(inA_b, 1 + 4, mem)); let a2' := old(buffer64_read(inA_b, 2 + 4, mem)); let a3' := old(buffer64_read(inA_b, 3 + 4, mem)); let b0' := old(buffer64_read(inB_b, 0 + 4, mem)); let b1' := old(buffer64_read(inB_b, 1 + 4, mem)); let b2' := old(buffer64_read(inB_b, 2 + 4, mem)); let b3' := old(buffer64_read(inB_b, 3 + 4, mem)); let a' := pow2_four(a0', a1', a2', a3'); let b' := pow2_four(b0', b1', b2', b3'); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let d := pow2_four(d0, d1, d2, d3); let d0' := buffer64_read(dst_b, 0 + 4, mem); let d1' := buffer64_read(dst_b, 1 + 4, mem); let d2' := buffer64_read(dst_b, 2 + 4, mem); let d3' := buffer64_read(dst_b, 3 + 4, mem); let d' := pow2_four(d0', d1', d2', d3'); d % prime == old(a * b) % prime; d' % prime == old(a' * b') % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// modifies_buffer_2(dst_b, tmp_b, old(mem), mem); { CreateHeaplets(list( declare_buffer64(inA_b, 0, Secret, Immutable), declare_buffer64(inB_b, 0, Secret, Immutable), declare_buffer64(dst_b, 0, Secret, Mutable), declare_buffer64(tmp_b, 0, Secret, Mutable))); LargeComment("Compute the raw multiplication tmp[0] <- f1[0] * f2[0]"); Fast_multiply(0, tmp_b, inA_b, inB_b); LargeComment("Compute the raw multiplication tmp[1] <- f1[1] * f2[1]"); Fast_multiply(4, tmp_b, inA_b, inB_b); Newline(); Comment("Line up pointers"); Mov64(inA_ptr, tmp_ptr); Mov64(rdi, r15); // Restore dst_ptr LargeComment("Wrap the results back into the field"); Carry_wide(0, dst_b, tmp_b); Newline(); Carry_wide(4, dst_b, tmp_b); DestroyHeaplets(); } procedure Fmul2_stdcall( inline win:bool, ghost tmp_b:buffer64, ghost inA_b:buffer64, ghost dst_b:buffer64, ghost inB_b:buffer64) {:public} {:quick} {:exportSpecs} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= r15; inB_ptr @= rcx; tmp_in := (if win then rcx else rdi); inA_in := (if win then rdx else rsi); dst_in := (if win then r8 else rdx); inB_in := (if win then r9 else rcx); modifies rax; rbx; rcx; rdx; rsi; rdi; rbp; rsp; r8; r9; r10; r11; r13; r14; r15; efl; heap0; memLayout; stack; stackTaint; requires rsp == init_rsp(stack); is_initial_heap(memLayout, mem); adx_enabled && bmi2_enabled; buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, inB_b) || dst_b == inB_b; buffers_disjoint(dst_b, tmp_b) || dst_b == tmp_b; buffers_disjoint(tmp_b, inA_b); buffers_disjoint(tmp_b, inB_b); validDstAddrs64(mem, dst_in, dst_b, 8, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 8, memLayout, Secret); validSrcAddrs64(mem, inB_in, inB_b, 8, memLayout, Secret); validDstAddrs64(mem, tmp_in, tmp_b, 16, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let b0 := old(buffer64_read(inB_b, 0, mem)); let b1 := old(buffer64_read(inB_b, 1, mem)); let b2 := old(buffer64_read(inB_b, 2, mem)); let b3 := old(buffer64_read(inB_b, 3, mem)); let a := pow2_four(a0, a1, a2, a3); let b := pow2_four(b0, b1, b2, b3); let a0' := old(buffer64_read(inA_b, 0 + 4, mem)); let a1' := old(buffer64_read(inA_b, 1 + 4, mem)); let a2' := old(buffer64_read(inA_b, 2 + 4, mem)); let a3' := old(buffer64_read(inA_b, 3 + 4, mem)); let b0' := old(buffer64_read(inB_b, 0 + 4, mem)); let b1' := old(buffer64_read(inB_b, 1 + 4, mem)); let b2' := old(buffer64_read(inB_b, 2 + 4, mem)); let b3' := old(buffer64_read(inB_b, 3 + 4, mem)); let a' := pow2_four(a0', a1', a2', a3'); let b' := pow2_four(b0', b1', b2', b3'); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let d := pow2_four(d0, d1, d2, d3); let d0' := buffer64_read(dst_b, 0 + 4, mem); let d1' := buffer64_read(dst_b, 1 + 4, mem); let d2' := buffer64_read(dst_b, 2 + 4, mem); let d3' := buffer64_read(dst_b, 3 + 4, mem); let d' := pow2_four(d0', d1', d2', d3'); d % prime == old(a * b) % prime; d' % prime == old(a' * b') % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// modifies_buffer_2(dst_b, tmp_b, old(mem), mem); win ==> rbx == old(rbx); win ==> rbp == old(rbp); win ==> rdi == old(rdi); win ==> rsi == old(rsi); win ==> rsp == old(rsp); win ==> r13 == old(r13); win ==> r14 == old(r14); win ==> r15 == old(r15); !win ==> rbx == old(rbx); !win ==> rbp == old(rbp); !win ==> r13 == old(r13); !win ==> r14 == old(r14); !win ==> r15 == old(r15); rsp == old(rsp); { Push_Secret(r13); Push_Secret(r14); Push_Secret(r15); Push_Secret(rbx); // Line up the rest of the arguments inline if (win) { // Store callee-save registers Push_Secret(rsi); Push_Secret(rdi); Mov64(tmp_ptr, rcx); Mov64(inA_ptr, rdx); Mov64(dst_ptr, r8); Mov64(inB_ptr, r9); } else { Mov64(dst_ptr, rdx); } Fmul2(tmp_b, inA_b, dst_b, inB_b); inline if (win) { Pop_Secret(rdi); Pop_Secret(rsi); } Pop_Secret(rbx); Pop_Secret(r15); Pop_Secret(r14); Pop_Secret(r13); } procedure Fsqr(ghost tmp_b:buffer64, ghost inA_b:buffer64, ghost dst_b:buffer64) {:public} {:quick} {:exportSpecs} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= r12; tmp_in := rdi; inA_in := rsi; dst_in := r12; reads r12; modifies rax; rbx; rcx; rdx; rdi; rsi; r8; r9; r10; r11; r13; r14; r15; efl; heap0; memLayout; requires adx_enabled && bmi2_enabled; is_initial_heap(memLayout, mem); buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, tmp_b) || dst_b == tmp_b; buffers_disjoint(tmp_b, inA_b); validDstAddrs64(mem, dst_in, dst_b, 4, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 4, memLayout, Secret); validDstAddrs64(mem, tmp_in, tmp_b, 8, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let a := pow2_four(a0, a1, a2, a3); let d := pow2_four(d0, d1, d2, d3); d % prime == old(a * a) % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// old(r12) == r12; modifies_buffer_2(dst_b, tmp_b, old(mem), mem); { CreateHeaplets(list( declare_buffer64(inA_b, 0, Secret, Immutable), declare_buffer64(dst_b, 0, Secret, Mutable), declare_buffer64(tmp_b, 0, Secret, Mutable))); LargeComment("Compute the raw multiplication: tmp <- f * f"); Fast_sqr(0, tmp_b, inA_b); Newline(); Comment("Line up pointers"); Mov64(inA_ptr, tmp_ptr); Mov64(rdi, r12); LargeComment("Wrap the result back into the field"); Carry_wide(0, dst_b, tmp_b); DestroyHeaplets(); } procedure Fsqr_stdcall( inline win:bool, ghost tmp_b:buffer64, ghost inA_b:buffer64, ghost dst_b:buffer64) {:public} {:quick} {:exportSpecs} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= r12; tmp_in := (if win then rcx else rdi); inA_in := (if win then rdx else rsi); dst_in := (if win then r8 else rdx); modifies rax; rbx; rcx; rdx; rsi; rdi; rbp; rsp; r8; r9; r10; r11; r12; r13; r14; r15; efl; heap0; memLayout; stack; stackTaint; requires rsp == init_rsp(stack); is_initial_heap(memLayout, mem); adx_enabled && bmi2_enabled; buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, tmp_b) || dst_b == tmp_b; buffers_disjoint(tmp_b, inA_b); validDstAddrs64(mem, dst_in, dst_b, 4, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 4, memLayout, Secret); validDstAddrs64(mem, tmp_in, tmp_b, 8, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let a := pow2_four(a0, a1, a2, a3); let d := pow2_four(d0, d1, d2, d3); d % prime == old(a * a) % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// modifies_buffer_2(dst_b, tmp_b, old(mem), mem); // validSrcAddrs64(mem, dst_in, dst_b, 4, memLayout, Secret); win ==> rbx == old(rbx); win ==> rbp == old(rbp); win ==> rdi == old(rdi); win ==> rsi == old(rsi); win ==> rsp == old(rsp); win ==> r12 == old(r12); win ==> r13 == old(r13); win ==> r14 == old(r14); win ==> r15 == old(r15); !win ==> rbx == old(rbx); !win ==> rbp == old(rbp); !win ==> r12 == old(r12); !win ==> r13 == old(r13); !win ==> r14 == old(r14); !win ==> r15 == old(r15); rsp == old(rsp); { Push_Secret(r15); Push_Secret(r13); Push_Secret(r14); Push_Secret(r12); Push_Secret(rbx); // Line up the rest of the arguments inline if (win) { // Store callee-save registers Push_Secret(rsi); Push_Secret(rdi); Mov64(tmp_ptr, rcx); Mov64(inA_ptr, rdx); Mov64(dst_ptr, r8); } else { Mov64(dst_ptr, rdx); } Fsqr(tmp_b, inA_b, dst_b); inline if (win) { Pop_Secret(rdi); Pop_Secret(rsi); } Pop_Secret(rbx); Pop_Secret(r12); Pop_Secret(r14); Pop_Secret(r13); Pop_Secret(r15); } procedure Fsqr2(ghost tmp_b:buffer64, ghost inA_b:buffer64, ghost dst_b:buffer64) {:public} {:quick} {:exportSpecs} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= r12; tmp_in := rdi; inA_in := rsi; dst_in := r12; reads r12; modifies rax; rbx; rcx; rdx; rdi; rsi; r8; r9; r10; r11; r13; r14; r15; efl; heap0; memLayout; requires adx_enabled && bmi2_enabled; is_initial_heap(memLayout, mem); buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, tmp_b) || dst_b == tmp_b; buffers_disjoint(tmp_b, inA_b); validDstAddrs64(mem, dst_in, dst_b, 8, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 8, memLayout, Secret); validDstAddrs64(mem, tmp_in, tmp_b, 16, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let a0' := old(buffer64_read(inA_b, 0 + 4, mem)); let a1' := old(buffer64_read(inA_b, 1 + 4, mem)); let a2' := old(buffer64_read(inA_b, 2 + 4, mem)); let a3' := old(buffer64_read(inA_b, 3 + 4, mem)); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let d0' := buffer64_read(dst_b, 0 + 4, mem); let d1' := buffer64_read(dst_b, 1 + 4, mem); let d2' := buffer64_read(dst_b, 2 + 4, mem); let d3' := buffer64_read(dst_b, 3 + 4, mem); let a := pow2_four(a0, a1, a2, a3); let a' := pow2_four(a0', a1', a2', a3'); let d := pow2_four(d0, d1, d2, d3); let d' := pow2_four(d0', d1', d2', d3'); d % prime == old(a * a) % prime; d' % prime == old(a' * a') % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// old(r12) == r12; modifies_buffer_2(dst_b, tmp_b, old(mem), mem); { CreateHeaplets(list( declare_buffer64(inA_b, 0, Secret, Immutable), declare_buffer64(dst_b, 0, Secret, Mutable), declare_buffer64(tmp_b, 0, Secret, Mutable))); Fast_sqr(0, tmp_b, inA_b); Newline(); Fast_sqr(4, tmp_b, inA_b); Newline(); Comment("Line up pointers"); Mov64(inA_ptr, tmp_ptr); Mov64(rdi, r12); Newline(); Carry_wide(0, dst_b, tmp_b); Newline(); Carry_wide(4, dst_b, tmp_b); DestroyHeaplets(); } procedure Fsqr2_stdcall( inline win:bool, ghost tmp_b:buffer64, ghost inA_b:buffer64, ghost dst_b:buffer64) {:public} {:quick} {:exportSpecs} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= r12; tmp_in := (if win then rcx else rdi); inA_in := (if win then rdx else rsi); dst_in := (if win then r8 else rdx); modifies rax; rbx; rcx; rdx; rsi; rdi; rbp; rsp; r8; r9; r10; r11; r12; r13; r14; r15; efl; heap0; memLayout; stack; stackTaint; requires rsp == init_rsp(stack); is_initial_heap(memLayout, mem); adx_enabled && bmi2_enabled; buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, tmp_b) || dst_b == tmp_b; buffers_disjoint(tmp_b, inA_b); validDstAddrs64(mem, dst_in, dst_b, 8, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 8, memLayout, Secret); validDstAddrs64(mem, tmp_in, tmp_b, 16, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let a0' := old(buffer64_read(inA_b, 0 + 4, mem)); let a1' := old(buffer64_read(inA_b, 1 + 4, mem)); let a2' := old(buffer64_read(inA_b, 2 + 4, mem)); let a3' := old(buffer64_read(inA_b, 3 + 4, mem)); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let d0' := buffer64_read(dst_b, 0 + 4, mem); let d1' := buffer64_read(dst_b, 1 + 4, mem); let d2' := buffer64_read(dst_b, 2 + 4, mem); let d3' := buffer64_read(dst_b, 3 + 4, mem); let a := pow2_four(a0, a1, a2, a3); let a' := pow2_four(a0', a1', a2', a3'); let d := pow2_four(d0, d1, d2, d3); let d' := pow2_four(d0', d1', d2', d3'); d % prime == old(a * a) % prime; d' % prime == old(a' * a') % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// modifies_buffer_2(dst_b, tmp_b, old(mem), mem); // validSrcAddrs64(mem, dst_in, dst_b, 8, memLayout, Secret); win ==> rbx == old(rbx); win ==> rbp == old(rbp); win ==> rdi == old(rdi); win ==> rsi == old(rsi); win ==> rsp == old(rsp); win ==> r12 == old(r12); win ==> r13 == old(r13); win ==> r14 == old(r14); win ==> r15 == old(r15); !win ==> rbx == old(rbx); !win ==> rbp == old(rbp); !win ==> r12 == old(r12); !win ==> r13 == old(r13); !win ==> r14 == old(r14); !win ==> r15 == old(r15); rsp == old(rsp); { Push_Secret(r15); Push_Secret(r13); Push_Secret(r14); Push_Secret(r12); Push_Secret(rbx); // Line up the rest of the arguments inline if (win) { // Store callee-save registers Push_Secret(rsi); Push_Secret(rdi); Mov64(tmp_ptr, rcx); Mov64(inA_ptr, rdx); Mov64(dst_ptr, r8); } else { Mov64(dst_ptr, rdx); } Fsqr2(tmp_b, inA_b, dst_b); inline if (win) { Pop_Secret(rdi); Pop_Secret(rsi); } Pop_Secret(rbx); Pop_Secret(r12); Pop_Secret(r14); Pop_Secret(r13); Pop_Secret(r15); } /* procedure Fsqr_loop_stdcall( inline win:bool, ghost dst_b:buffer64, ghost inA_b:buffer64, ghost tmp_b:buffer64) {:public} {:quick} lets tmp_ptr @= rdi; inA_ptr @= rsi; dst_ptr @= rdx; count @= rbx; saved_dst_ptr @= rbp; tmp_in := (if win then rcx else rdi); inA_in := (if win then rdx else rsi); dst_in := (if win then r8 else rdx); count_in := (if win then r9 else rcx); reads memLayout; modifies rax; rbx; rcx; rdx; rdi; rsi; r8; r9; r10; r11; r13; r14; r15; rbp; rsp; efl; heap0; stack; requires rsp == init_rsp(stack); adx_enabled && bmi2_enabled; count_in >= 1; buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, tmp_b); buffers_disjoint(tmp_b, inA_b); validDstAddrs64(mem, dst_in, dst_b, 4, memLayout, Secret); validSrcAddrs64(mem, inA_in, inA_b, 4, memLayout, Secret); validSrcAddrs64(mem, tmp_in, tmp_b, 8, memLayout, Secret); ensures let a0 := old(buffer64_read(inA_b, 0, mem)); let a1 := old(buffer64_read(inA_b, 1, mem)); let a2 := old(buffer64_read(inA_b, 2, mem)); let a3 := old(buffer64_read(inA_b, 3, mem)); let d0 := buffer64_read(dst_b, 0, mem); let d1 := buffer64_read(dst_b, 1, mem); let d2 := buffer64_read(dst_b, 2, mem); let d3 := buffer64_read(dst_b, 3, mem); let a := pow2_four(a0, a1, a2, a3); let d := pow2_four(d0, d1, d2, d3); //d % prime == old(a + b) % prime; ////////////////////////////////////// // Framing ////////////////////////////////////// modifies_buffer_2(dst_b, tmp_b, old(mem), mem); validSrcAddrs64(mem, dst_in, dst_b, 4, memLayout, Secret); win ==> rsi == old(rsi); r13 == old(r13); r14 == old(r14); r15 == old(r15); rsp == old(rsp); rbp == old(rbp); { Push(rbp); Push(rbx); Push(r15); Push(r13); Push(r14); // Line up the rest of the arguments inline if (win) { // Store callee-save registers Push(rsi); Mov64(tmp_ptr, rcx); Mov64(inA_ptr, rdx); Mov64(saved_dst_ptr, r8); // Save dst_ptr, since Fast_multiply will clobber it Mov64(count, r9); } else { Mov64(saved_dst_ptr, rdx); // Save dst_ptr, since Fast_multiply will clobber it Mov64(count, rcx); } // rdi = tmp_ptr // rsi = inA_ptr Fast_sqr(0, tmp_b, inA_b); // Align arguments to Carry_wide, which expects rdi = saved_dst_ptr, and rsi = tmp buffer Mov64(inA_ptr, tmp_ptr); Mov64(rdi, saved_dst_ptr); Carry_wide(0, dst_b, tmp_b); // Prepare for loop, which repeatedly squares dst // Need: rdi = tmp_ptr, and rsi = dst_ptr Mov64(rdi, rsi); // Restore tmp_ptr (in preparaton for another iteration of Fast_sqr) Mov64(rsi, saved_dst_ptr); Sub64(count, 1); let pre_loop_mem := heap0; while (count > 0) invariant 0 <= count < old(count_in); adx_enabled && bmi2_enabled; //offset == 0 || offset == 4; buffers_disjoint(dst_b, inA_b) || dst_b == inA_b; buffers_disjoint(dst_b, tmp_b); buffers_disjoint(tmp_b, inA_b); buffers_disjoint(tmp_b, inA_b); saved_dst_ptr == rsi; validDstAddrs64(mem, rsi, dst_b, 4 + 0, memLayout, Secret); validSrcAddrs64(mem, tmp_ptr, tmp_b, 8 + 0*2, memLayout, Secret); modifies_buffer_2(dst_b, tmp_b, pre_loop_mem, mem); decreases count; { Fast_sqr(0, tmp_b, dst_b); // Align arguments to Carry_wide Mov64(inA_ptr, tmp_ptr); Mov64(rdi, saved_dst_ptr); Carry_wide(0, dst_b, tmp_b); Mov64(tmp_ptr, inA_ptr); // Restore tmp_ptr (in preparaton for another iteration of Fast_sqr) Mov64(inA_ptr, saved_dst_ptr); // Restore dst Sub64(count, 1); } inline if (win) { Pop(rsi); } Pop(r14); Pop(r13); Pop(r15); Pop(rbx); Pop(rbp); } */