include "../../../arch/x64/Vale.X64.InsBasic.vaf" include "../../../arch/x64/Vale.X64.InsMem.vaf" include "../../../arch/x64/Vale.X64.InsVector.vaf" include "../../../arch/x64/Vale.X64.InsAes.vaf" include "../../../crypto/aes/x64/Vale.AES.X64.PolyOps.vaf" include{:fstar}{:open} "Vale.Def.Prop_s" include{:fstar}{:open} "Vale.Def.Opaque_s" include{:fstar}{:open} "Vale.Def.Words_s" include{:fstar}{:open} "Vale.Def.Types_s" include{:fstar}{:open} "FStar.Seq.Base" include{:fstar}{:open} "Vale.AES.AES_s" include{:fstar}{:open} "Vale.X64.Machine_s" include{:fstar}{:open} "Vale.X64.Memory" include{:fstar}{:open} "Vale.X64.State" include{:fstar}{:open} "Vale.X64.Decls" include{:fstar}{:open} "Vale.X64.QuickCode" include{:fstar}{:open} "Vale.X64.QuickCodes" include{:fstar}{:open} "Vale.Arch.Types" include{:fstar}{:open} "Vale.AES.AES_helpers" //include{:fstar}{:open} "Vale.Poly1305.Math" include{:fstar}{:open} "Vale.AES.GCM_helpers" include{:fstar}{:open} "Vale.AES.GCTR_s" include{:fstar}{:open} "Vale.AES.GCTR" include{:fstar}{:open} "Vale.Arch.TypesNative" include{:fstar}{:open} "Vale.X64.CPU_Features_s" include{:fstar}{:open} "Vale.Math.Poly2_s" include{:fstar}{:open} "Vale.AES.GF128_s" include{:fstar}{:open} "Vale.AES.GF128" include{:fstar}{:open} "Vale.AES.GHash" include "Vale.AES.X64.AESopt2.vaf" include "Vale.AES.X64.AESGCM_expected_code.vaf" module Vale.AES.X64.AESopt #verbatim{:interface}{:implementation} open FStar.Mul open Vale.Def.Prop_s open Vale.Def.Opaque_s open Vale.Def.Words_s open Vale.Def.Types_s open FStar.Seq open Vale.Arch.Types open Vale.Arch.HeapImpl open Vale.AES.AES_s open Vale.X64.Machine_s open Vale.X64.Memory open Vale.X64.State open Vale.X64.Decls open Vale.X64.InsBasic open Vale.X64.InsMem open Vale.X64.InsVector open Vale.X64.InsAes open Vale.X64.QuickCode open Vale.X64.QuickCodes open Vale.AES.AES_helpers //open Vale.Poly1305.Math // For lemma_poly_bits64() open Vale.AES.GCM_helpers open Vale.AES.GCTR_s open Vale.AES.GCTR open Vale.Arch.TypesNative open Vale.X64.CPU_Features_s open Vale.Math.Poly2_s open Vale.AES.GF128_s open Vale.AES.GF128 open Vale.AES.GHash open Vale.AES.X64.PolyOps open Vale.AES.X64.AESopt2 open Vale.AES.X64.AESGCM_expected_code open Vale.Transformers.Transform #endverbatim #verbatim{:interface} open FStar.Mul let aes_reqs0 (alg:algorithm) (key:seq nat32) (round_keys:seq quad32) (keys_b:buffer128) (key_ptr:int) (heap0:vale_heap) (layout:vale_heap_layout) : prop0 = aesni_enabled /\ pclmulqdq_enabled /\ avx_enabled /\ alg = AES_128 /\ //(alg = AES_128 || alg = AES_256) /\ is_aes_key_LE alg key /\ length(round_keys) == nr(alg) + 1 /\ round_keys == key_to_round_keys_LE alg key /\ validSrcAddrsOffset128 heap0 key_ptr keys_b 8 (nr alg + 1 - 8) layout Secret /\ buffer128_as_seq heap0 keys_b == round_keys let aes_reqs_offset (alg:algorithm) (key:seq nat32) (round_keys:seq quad32) (keys_b:buffer128) (key_ptr:int) (heap0:vale_heap) (layout:vale_heap_layout) : prop0 = aesni_enabled /\ avx_enabled /\ pclmulqdq_enabled /\ (alg = AES_128 || alg = AES_256) /\ is_aes_key_LE alg key /\ length(round_keys) == nr(alg) + 1 /\ round_keys == key_to_round_keys_LE alg key /\ validSrcAddrsOffset128 heap0 key_ptr keys_b 8 (nr alg + 1 - 8) layout Secret /\ s128 heap0 keys_b == round_keys let six_of (a:Type0) = a & a & a & a & a & a let quad32_6 = six_of quad32 unfold let make_six_of (#a:Type0) (f:(n:nat{n < 6}) -> GTot a) : GTot (six_of a) = (f 0, f 1, f 2, f 3, f 4, f 5) unfold let map_six_of (#a #b:Type0) (x:six_of a) (f:a -> GTot b) : GTot (six_of b) = let (x0, x1, x2, x3, x4, x5) = x in (f x0, f x1, f x2, f x3, f x4, f x5) unfold let map2_six_of (#a #b #c:Type0) (x:six_of a) (y:six_of b) (f:a -> b -> GTot c) : GTot (six_of c) = let (x0, x1, x2, x3, x4, x5) = x in let (y0, y1, y2, y3, y4, y5) = y in (f x0 y0, f x1 y1, f x2 y2, f x3 y3, f x4 y4, f x5 y5) let rounds_opaque_6 (init:quad32_6) (round_keys:seq quad32) (rnd:nat{rnd < length round_keys}) : GTot quad32_6 = map_six_of init (fun x -> eval_rounds x round_keys rnd) let xor_reverse_inc32lite_6 (n i0:int) (ctr_BE rndkey:quad32) : GTot quad32_6 = make_six_of (fun i -> let r = reverse_bytes_quad32 (inc32lite ctr_BE (i0 + i)) in if i < n then quad32_xor r rndkey else r) //let scratch_reqs (scratch_b:buffer128) (count:nat) (heap3:vale_heap) (s:seq quad32) (z3:quad32) : prop0 = // count * 6 + 6 <= length s /\ ( // let data = slice s (count * 6) (count * 6 + 6) in // z3 == reverse_bytes_quad32 (index data 5) /\ // scratch_b_blocks true true scratch_b 8 5 heap3 data) let scratch_reqs (scratch_b:buffer128) (count:nat) (heap3:vale_heap) (s:seq quad32) (z3:quad32) : prop0 = count * 6 + 6 <= length s /\ ( let data = slice s (count * 6) (count * 6 + 6) in z3 == reverse_bytes_quad32 (index data 5) /\ buffer128_read scratch_b 3 heap3 == reverse_bytes_quad32 (index data 4) /\ buffer128_read scratch_b 4 heap3 == reverse_bytes_quad32 (index data 3) /\ buffer128_read scratch_b 5 heap3 == reverse_bytes_quad32 (index data 2) /\ buffer128_read scratch_b 6 heap3 == reverse_bytes_quad32 (index data 1) /\ buffer128_read scratch_b 7 heap3 == reverse_bytes_quad32 (index data 0)) #endverbatim #verbatim open FStar.Mul let add = Vale.Math.Poly2_s.add #reset-options "--z3rlimit 30" #endverbatim type six_of(a:Type(0)):Type(0) := tuple(a, a, a, a, a, a); type quad32_6:Type(0) := six_of(quad32); function make_six_of#[#a:Type(0)](f:fun(int_range(0, 5)) -> a):six_of(a) extern; function map_six_of#[#a:Type(0), #b:Type(0)](x:six_of(a), f:fun(a) -> b):six_of(b) extern; function map2_six_of#[#a:Type(0), #b:Type(0), #c:Type(0)]( x:six_of(a), y:six_of(b), f:fun(a, b) -> c):six_of(c) extern; function rounds_opaque_6(init:quad32_6, round_keys:seq(quad32), rnd:nat):quad32_6 extern; function xor_reverse_inc32lite_6(n:int, i0:int, ctr_BE:quad32, rndkey:quad32):quad32_6 extern; function aes_reqs0(alg:algorithm, key:seq(nat32), round_keys:seq(quad32), keys_b:buffer128, key_ptr:int, heap0:vale_heap, layout:vale_heap_layout):prop extern; function aes_reqs_offset(alg:algorithm, key:seq(nat32), round_keys:seq(quad32), keys_b:buffer128, key_ptr:int, heap0:vale_heap, layout:vale_heap_layout):prop extern; function scratch_reqs(scratch_b:buffer128, count:nat, heap3:vale_heap, s:seq(quad32), z3:quad32):prop extern; #token +. precedence + #token *. precedence * #token %. precedence * #token ~~ precedence ! function operator(+.) (a:poly, b:poly):poly := add; function operator(*.) (a:poly, b:poly):poly := mul; function operator(%.) (a:poly, b:poly):poly := mod; function operator(~~) (a:quad32):poly := of_quad32; ghost procedure finish_aes_encrypt_le(ghost alg:algorithm, ghost input_LE:quad32, ghost key:seq(nat32)) requires is_aes_key_LE(alg, key); ensures aes_encrypt_LE(alg, key, input_LE) == eval_cipher(alg, input_LE, key_to_round_keys_LE(alg, key)); { aes_encrypt_LE_reveal(); eval_cipher_reveal(); } procedure Load_two_lsb(inout dst:xmm) {:quick exportOnly} {:public} lets constp @= r11; modifies constp; efl; requires sse_enabled; ensures dst == Mkfour(2, 0, 0, 0); { ZeroXmm(dst); lemma_insert_nat64_nat32s(dst, 2, 0); assert two_to_nat32(Mktwo(2, 0)) == 0x2; // OBSERVE PinsrqImm(dst, 2, 0, constp); } procedure Load_0xc2_msb(out dst:xmm) {:quick exportOnly} lets constp @= r11; modifies constp; efl; requires sse_enabled; ensures dst == Mkfour(0, 0, 0, 0xc2000000); { ZeroXmm(dst); assert two_to_nat32(Mktwo(0, 0xc2000000)) == 0xc200000000000000; // OBSERVE lemma_insert_nat64_nat32s(dst, 0, 0xc2000000); PinsrqImm(dst, 0xc200000000000000, 1, constp); } // Version with concrete values succeeds. //procedure Load_one_lsb() // {:quick} // lets constp @= r11; dst @= xmm1; // modifies constp; dst; efl; // ensures dst == Mkfour(1, 0, 0, 0); //{ // ZeroXmm(dst); // assert two_to_nat32(Mktwo(1, 0)) == 0x1; // OBSERVE // PinsrqImm(dst, 1, 0, constp); // insert_nat64_reveal(); //} procedure Load_one_lsb(inout dst:xmm) {:quick exportOnly} {:public} lets constp @= r11; modifies constp; efl; requires sse_enabled; ensures dst == Mkfour(1, 0, 0, 0); { ZeroXmm(dst); lemma_insert_nat64_nat32s(dst, 1, 0); assert two_to_nat32(Mktwo(1, 0)) == 0x1; // OBSERVE PinsrqImm(dst, 1, 0, constp); } procedure Handle_ctr32(ghost ctr_BE:quad32) {:quick} lets Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Z1 @= xmm5; Z2 @= xmm6; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; constp @= r11; reads rndkey; modifies constp; Ii; T1; T2; Z1; Z2; inout1; inout2; inout3; inout4; inout5; efl; requires avx_enabled && sse_enabled; T1 == reverse_bytes_quad32(ctr_BE); ensures tuple(inout1, inout2, inout3, inout4, inout5, T1) == xor_reverse_inc32lite_6(2, 1, ctr_BE, rndkey); { InitPshufbMask(Ii, constp); // # borrow $Ii for .Lbswap_mask VPshufb(Z2, T1, Ii); // # byte-swap counter // OpenSSL uses a memory operand with VPaddd to do the addition with .Lone_lsb. We avoid that here for now. //Load_two_lsb(); // # borrow $Z1, .Ltwo_lsb //Load_one_lsb(); // # .Lone_lsb Load_one_lsb(Z1); VPaddd(inout1, Z2, Z1); Load_two_lsb(Z1); VPaddd(inout2, Z2, Z1); VPaddd(inout3, inout1, Z1); VPshufb(inout1, inout1, Ii); VPaddd(inout4, inout2, Z1); VPshufb(inout2, inout2, Ii); VPxor(inout1, inout1, rndkey); VPaddd(inout5, inout3, Z1); VPshufb(inout3, inout3, Ii); VPxor(inout2, inout2, rndkey); VPaddd(T1, inout4, Z1); // # byte-swapped next counter value VPshufb(inout4, inout4, Ii); VPshufb(inout5, inout5, Ii); VPshufb(T1, T1, Ii); // # next counter value } procedure Loop6x_ctr_update( inline alg:algorithm, ghost h_LE:quad32, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost hkeys_b:buffer128, ghost ctr_BE:quad32) {:quick} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; key @= rcx; Xip @= r9; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z1 @= xmm5; Z2 @= xmm6; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; counter @= rbx; constp @= r11; h := of_quad32(reverse_bytes_quad32(h_LE)); reads key; rndkey; Xip; heap0; memLayout; modifies counter; constp; Hkey; Ii; T1; T2; Z1; Z2; inout0; inout1; inout2; inout3; inout4; inout5; efl; requires sse_enabled; // AES reqs aes_reqs_offset(alg, key_words, round_keys, keys_b, key, heap0, memLayout); //aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); //rndkey == index(round_keys, 0); // Counter requirements T2 == Mkfour(0, 0, 0, 0x1000000); T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 0)); //0 <= counter < 256; // Implied by next line counter == ctr_BE.lo0 % 256; //inout5.hi3 + 0x1000000 < pow2_32; inout0 == quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 0)), rndkey); counter + 6 < 256 ==> tuple(inout0, inout1, inout2, inout3, inout4, inout5) == xor_reverse_inc32lite_6(1, 0, ctr_BE, rndkey); hkeys_b_powers(hkeys_b, heap0, memLayout, Xip - 0x20, h); ensures T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 6)); 0 <= counter < 256; counter == inc32lite(ctr_BE, 6).lo0 % 256; tuple(inout0, inout1, inout2, inout3, inout4, inout5) == xor_reverse_inc32lite_6(3, 0, ctr_BE, rndkey); Hkey == buffer128_read(hkeys_b, 0, heap0); { // OpenSSL does this with "add `6<<24`,counter", followed by jc, // which handles wrap and control flow more efficiently Add64(counter, 6); if (counter >= 256) { Load128_buffer(heap0, Hkey, Xip, 0x00-0x20, Secret, hkeys_b, 0); // # $Hkey^1 Handle_ctr32(ctr_BE); //, counter); Sub64(counter, 256); } else { Load128_buffer(heap0, Hkey, Xip, 0x00-0x20, Secret, hkeys_b, 0); // # $Hkey^1 VPaddd(T1, T2, inout5); // OpenSSL uses VPaddb VPxor(inout1, inout1, rndkey); VPxor(inout2, inout2, rndkey); lemma_msb_in_bounds(ctr_BE, inout5, T1, old(counter)); } } procedure Loop6x_plain( inline alg:algorithm, // OpenSSL includes the number of rounds (nr) as a dynamic parameter (stored with the key). Saves code space but adds extra instructions to the fast path. Maybe branch predictor is good enough for it not to matter inline rnd:nat, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost init:quad32_6, out rndkey:xmm) {:quick exportOnly} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; key @= rcx; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; reads key; heap0; memLayout; modifies inout0; inout1; inout2; inout3; inout4; inout5; efl; requires sse_enabled; @rndkey == 1 || @rndkey == 2 || @rndkey == 15; // AES reqs // aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); aes_reqs_offset(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rnd + 1 < length(round_keys); tuple(inout0, inout1, inout2, inout3, inout4, inout5) == rounds_opaque_6(init, round_keys, rnd); ensures tuple(inout0, inout1, inout2, inout3, inout4, inout5) == rounds_opaque_6(init, round_keys, rnd + 1); { Load128_buffer(heap0, rndkey, key, 16*(rnd+1)-0x80, Secret, keys_b, rnd+1); VAESNI_enc(inout0, inout0, rndkey); VAESNI_enc(inout1, inout1, rndkey); VAESNI_enc(inout2, inout2, rndkey); VAESNI_enc(inout3, inout3, rndkey); VAESNI_enc(inout4, inout4, rndkey); VAESNI_enc(inout5, inout5, rndkey); eval_rounds_reveal(); commute_sub_bytes_shift_rows_forall(); } procedure Loop6x_preamble( inline alg:algorithm, // OpenSSL includes the number of rounds (nr) as a dynamic parameter (stored with the key). Saves code space but adds extra instructions to the fast path. Maybe branch predictor is good enough for it not to matter ghost h_LE:quad32, ghost iv_b:buffer128, ghost scratch_b:buffer128, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost hkeys_b:buffer128, ghost ctr_BE:quad32) {:quick} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; key @= rcx; ivp @= r8; Xip @= r9; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z1 @= xmm5; Z2 @= xmm6; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; counter @= rbx; constp @= r11; h := of_quad32(reverse_bytes_quad32(h_LE)); reads key; ivp; Xip; rbp; rndkey; memLayout; heap0; modifies counter; constp; Hkey; Ii; T1; T2; Z1; Z2; inout0; inout1; inout2; inout3; inout4; inout5; heap3; efl; requires sse_enabled; // Valid ptrs and buffers validDstAddrs128(heap3, rbp, scratch_b, 9, memLayout, Secret); // AES reqs // aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); aes_reqs_offset(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rndkey == index(round_keys, 0); // Counter requirements T2 == Mkfour(0, 0, 0, 0x1000000); T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 0)); counter == ctr_BE.lo0 % 256; inout0 == quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 0)), rndkey); counter + 6 < 256 ==> tuple(inout0, inout1, inout2, inout3, inout4, inout5) == xor_reverse_inc32lite_6(1, 0, ctr_BE, rndkey); hkeys_b_powers(hkeys_b, heap0, memLayout, Xip - 0x20, h); ensures // Framing modifies_buffer_specific128(scratch_b, old(heap3), heap3, 8, 8); // Semantics // IO let init := make_six_of(fun (n:int_range(0, 5)) quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, n)), rndkey)); tuple(inout0, inout1, inout2, inout3, inout4, inout5) == rounds_opaque_6(init, round_keys, 0); // Next counter buffer128_read(scratch_b, 8, heap3) == reverse_bytes_quad32(inc32lite(ctr_BE, 6)); // Counter details 0 <= counter < 256; counter == inc32lite(ctr_BE, 6).lo0 % 256; Hkey == buffer128_read(hkeys_b, 0, old(heap0)); { Loop6x_ctr_update(alg, h_LE, key_words, round_keys, keys_b, hkeys_b, ctr_BE); init_rounds_opaque(inout0, round_keys); init_rounds_opaque(inout1, round_keys); init_rounds_opaque(inout2, round_keys); /* init_rounds_opaque(inout0, round_keys); if (key > 72) { // TODO: FIX! Handle_ctr32(ctr_BE); } else { VPaddd(T1, T2, inout5); // OpenSSL uses VPaddb VPxor(inout1, inout1, rndkey); VPxor(inout2, inout2, rndkey); lemma_incr_msb(inc32lite(ctr_BE, 5), old(inout5), T1, 1); } init_rounds_opaque(inout1, round_keys); init_rounds_opaque(inout2, round_keys); */ Store128_buffer(heap3, rbp, T1, 0x80, Secret, scratch_b, 8); // # save next counter value /* XXX[jb]: Changed from Store128_buffer(heap2, ivp, T1, 0, Secret, iv_b, 0); */ VPxor(inout3, inout3, rndkey); init_rounds_opaque(inout3, round_keys); VPxor(inout4, inout4, rndkey); init_rounds_opaque(inout4, round_keys); VPxor(inout5, inout5, rndkey); init_rounds_opaque(inout5, round_keys); } procedure Loop6x_reverse128( inline in0_offset:nat, inline stack_offset:nat, ghost start:nat, ghost in0_b:buffer128, ghost scratch_b:buffer128) {:quick} lets in0 @= r14; reads rbp; in0; memLayout; heap6; modifies r12; r13; heap3; requires sse_enabled && movbe_enabled; validSrcAddrsOffset128(heap6, in0, in0_b, start, in0_offset + 1, memLayout, Secret); validDstAddrs128(heap3, rbp, scratch_b, stack_offset + 1, memLayout, Secret); ensures modifies_buffer_specific128(scratch_b, old(heap3), heap3, stack_offset, stack_offset); buffer128_read(scratch_b, stack_offset, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, start + in0_offset, heap6))); { LoadBe64_buffer128(heap6, r13, in0, in0_offset * 16 + 8, Secret, true, in0_b, start + in0_offset); LoadBe64_buffer128(heap6, r12, in0, in0_offset * 16, Secret, false, in0_b, start + in0_offset); Store64_buffer128(heap3, rbp, r13, stack_offset * 16, Secret, false, scratch_b, stack_offset); Store64_buffer128(heap3, rbp, r12, stack_offset * 16 + 8, Secret, true, scratch_b, stack_offset); lemma_reverse_bytes_quad32_64(old(buffer128_read(in0_b, start + in0_offset, heap6)), old(buffer128_read(scratch_b, stack_offset, heap3)), buffer128_read(scratch_b, stack_offset, heap3)); } procedure Loop6x_round9( inline alg:algorithm, // OpenSSL includes the number of rounds (nr) as a dynamic parameter (stored with the key). Saves code space but adds extra instructions to the fast path. Maybe branch predictor is good enough for it not to matter ghost count:nat, ghost in_b:buffer128, ghost scratch_b:buffer128, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128) {:quick} lets inp @= rdi; key @= rcx; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z1 @= xmm5; Z2 @= xmm6; Z3 @= xmm7; reads key; inp; rbp; memLayout; heap0; heap6; modifies Ii; T1; T2; Hkey; Z1; Z2; Z3; efl; heap3; requires sse_enabled; validSrcAddrsOffset128(heap6, inp, in_b, count*6, 6, memLayout, Secret); validDstAddrs128(heap3, rbp, scratch_b, 8, memLayout, Secret); // aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); aes_reqs_offset(alg, key_words, round_keys, keys_b, key, heap0, memLayout); T1 == buffer128_read(keys_b, #nat(nr(alg)), heap0); ensures modifies_buffer_specific128(scratch_b, old(heap3), heap3, 1, 1); tuple(T2, Ii, Z1, Z2, Z3, Hkey) == make_six_of(fun(i:int_range(0, 5)) quad32_xor(index(round_keys, #nat(nr(alg))), buffer128_read(in_b, count * 6 + i, heap6))); buffer128_read(scratch_b, 1, heap3) == old(Z3); { /* XXX[jb]: The Load128_buffer(heap0, T1, key, 0xa0-0x80, Secret, keys_b, 10); has been removed from here to allow for even cleaner code to verify :) */ Store128_buffer(heap3, rbp, Z3, 0x10, Secret, scratch_b, 1); // # postpone vpxor $Z3,$Xi,$Xi VPxor(T2, T1, Mem128(heap6, inp, 0x00, Secret, in_b, count*6 + 0)); VPxor(Ii, T1, Mem128(heap6, inp, 0x10, Secret, in_b, count*6 + 1)); VPxor(Z1, T1, Mem128(heap6, inp, 0x20, Secret, in_b, count*6 + 2)); VPxor(Z2, T1, Mem128(heap6, inp, 0x30, Secret, in_b, count*6 + 3)); VPxor(Z3, T1, Mem128(heap6, inp, 0x40, Secret, in_b, count*6 + 4)); VPxor(Hkey, T1, Mem128(heap6, inp, 0x50, Secret, in_b, count*6 + 5)); } procedure load_one_msb() {:quick} lets constp @= r11; T2 @= xmm2; modifies constp; T2; efl; requires sse_enabled; ensures T2 == Mkfour(0, 0, 0, 0x1000000); { ZeroXmm(T2); assert two_to_nat32(Mktwo(0, 0x1000000)) == 0x100000000000000; // OBSERVE PinsrqImm(T2, 0x100000000000000, 1, constp); insert_nat64_reveal(); } procedure Loop6x_final( inline alg:algorithm, ghost iv_b:buffer128, ghost scratch_b:buffer128, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost ctr_orig:quad32, ghost init:quad32_6, ghost ctrs:quad32_6, ghost plain:quad32_6, ghost inb:quad32) {:quick} {:public} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; inp @= rdi; outp @= rsi; key @= rcx; ivp @= r8; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z1 @= xmm5; Z2 @= xmm6; Z3 @= xmm7; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; constp @= r11; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; reads key; rbp; ivp; memLayout; heap0; heap2; modifies inp; outp; constp; r12; r13; Ii; T1; T2; Hkey; Z1; Z2; Z3; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; efl; heap3; requires sse_enabled; // Valid ptrs and buffers validSrcAddrs128(heap2, ivp, iv_b, 1, memLayout, Public); validDstAddrs128(heap3, rbp, scratch_b, 9, memLayout, Secret); inp + 0x60 < pow2_64; outp + 0x60 < pow2_64; // AES reqs // aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); aes_reqs_offset(alg, key_words, round_keys, keys_b, key, heap0, memLayout); init == map_six_of(ctrs, fun(c:quad32) quad32_xor(c, index(round_keys, 0))); tuple(inout0, inout1, inout2, inout3, inout4, inout5) == rounds_opaque_6(init, round_keys, #nat(nr(alg) - 1)); r13 == reverse_bytes_nat64(hi64(inb)); r12 == reverse_bytes_nat64(lo64(inb)); let rk := index(round_keys, #nat(nr(alg))); tuple(T2, Ii, Z1, Z2, Z3, Hkey) == map_six_of(plain, fun(p:quad32) quad32_xor(rk, p)); //buffer128_read(iv_b, 0, heap2) == reverse_bytes_quad32(ctr_orig); buffer128_read(scratch_b, 8, heap3) == reverse_bytes_quad32(ctr_orig); ensures // Framing modifies_buffer_specific128(scratch_b, old(heap3), heap3, 7, 7); // Semantics buffer128_read(scratch_b, 7, heap3) == reverse_bytes_quad32(inb); tuple(inout0, inout1, inout2, inout3, inout4, inout5) == map2_six_of(plain, ctrs, fun(p:quad32, c:quad32) quad32_xor(p, aes_encrypt_LE(alg, key_words, c))); rndkey == index(round_keys, 0); inp == old(inp) + 0x60; outp == old(outp) + 0x60; T2 == Mkfour(0, 0, 0, 0x1000000); T1 == old(buffer128_read(scratch_b, 8, heap3)); let ctr := ctr_orig.lo0 % 256; ctr + 6 < 256 ==> tuple(T1, Ii, Z1, Z2, Z3, Hkey) == xor_reverse_inc32lite_6(0, 0, ctr_orig, rndkey); { lemma_quad32_xor_commutes_forall(); // Load128_buffer(heap2, T1, ivp, 0, Public, iv_b, 0); // # load next counter value Load128_buffer(heap3, T1, rbp, 0x80, Secret, scratch_b, 8); // # load next counter value VAESNI_enc_last(inout0, inout0, T2); load_one_msb(); // # borrow $T2, .Lone_msb VAESNI_enc_last(inout1, inout1, Ii); VPaddd(Ii, T1, T2); Store64_buffer128(heap3, rbp, r13, 7 * 16, Secret, false, scratch_b, 7); // OpenSSL is further offset by 8 (to account for return addr?) AddLea64(inp, inp, 0x60); VAESNI_enc_last(inout2, inout2, Z1); VPaddd(Z1, Ii, T2); Store64_buffer128(heap3, rbp, r12, 7 * 16 + 8, Secret, true, scratch_b, 7); // OpenSSL is further offset by 8 (to account for return addr?) AddLea64(outp, outp, 0x60); Load128_buffer(heap0, rndkey, key, 0x00 - 0x80, Secret, keys_b, 0); VAESNI_enc_last(inout3, inout3, Z2); VPaddd(Z2, Z1, T2); VAESNI_enc_last(inout4, inout4, Z3); VPaddd(Z3, Z2, T2); VAESNI_enc_last(inout5, inout5, Hkey); VPaddd(Hkey, Z3, T2); finish_cipher_opt(alg, ctrs._1, plain._1, init._1, old(inout0), inout0, round_keys); finish_cipher_opt(alg, ctrs._2, plain._2, init._2, old(inout1), inout1, round_keys); finish_cipher_opt(alg, ctrs._3, plain._3, init._3, old(inout2), inout2, round_keys); finish_cipher_opt(alg, ctrs._4, plain._4, init._4, old(inout3), inout3, round_keys); finish_cipher_opt(alg, ctrs._5, plain._5, init._5, old(inout4), inout4, round_keys); finish_cipher_opt(alg, ctrs._6, plain._6, init._6, old(inout5), inout5, round_keys); finish_aes_encrypt_le(alg, ctrs._1, key_words); finish_aes_encrypt_le(alg, ctrs._2, key_words); finish_aes_encrypt_le(alg, ctrs._3, key_words); finish_aes_encrypt_le(alg, ctrs._4, key_words); finish_aes_encrypt_le(alg, ctrs._5, key_words); finish_aes_encrypt_le(alg, ctrs._6, key_words); lemma_reverse_bytes_quad32_64(inb, old(buffer128_read(scratch_b, 7, heap3)), buffer128_read(scratch_b, 7, heap3)); lemma_incr_msb(ctr_orig, T1, Ii, 1); lemma_incr_msb(ctr_orig, T1, Z1, 2); lemma_incr_msb(ctr_orig, T1, Z2, 3); lemma_incr_msb(ctr_orig, T1, Z3, 4); lemma_incr_msb(ctr_orig, T1, Hkey, 5); } procedure Loop6x_save_output(ghost count:nat, ghost out_b:buffer128) {:quick} {:public} lets outp @= rsi; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z1 @= xmm5; Z2 @= xmm6; Z3 @= xmm7; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; reads outp; Ii; T1; T2; Hkey; Z1; Z2; Z3; rndkey; memLayout; modifies inout0; inout1; inout2; inout3; inout4; inout5; heap6; efl; requires // Valid ptrs and buffers avx_enabled && sse_enabled; validDstAddrsOffset128(heap6, outp - 0x60, out_b, count*6, 6, memLayout, Secret); ensures modifies_buffer_specific128(out_b, old(heap6), heap6, count*6+0, count*6+5); slice(buffer128_as_seq(heap6, out_b), 0, 6 * count) == old(slice(buffer128_as_seq(heap6, out_b), 0, 6 * count)); old(tuple(inout0, inout1, inout2, inout3, inout4, inout5)) == make_six_of(fun(i:int_range(0, 5)) buffer128_read(out_b, count * 6 + i, heap6)); tuple(inout0, inout1, inout2, inout3, inout4, inout5) == tuple(quad32_xor(T1, rndkey), Ii, Z1, Z2, Z3, Hkey); { Store128_buffer(heap6, outp, inout0, 0-0x60, Secret, out_b, count*6 + 0); VPxor(inout0, T1, rndkey); Store128_buffer(heap6, outp, inout1, 0-0x50, Secret, out_b, count*6 + 1); Mov128(inout1, Ii); Store128_buffer(heap6, outp, inout2, 0-0x40, Secret, out_b, count*6 + 2); Mov128(inout2, Z1); Store128_buffer(heap6, outp, inout3, 0-0x30, Secret, out_b, count*6 + 3); Mov128(inout3, Z2); Store128_buffer(heap6, outp, inout4, 0-0x20, Secret, out_b, count*6 + 4); Mov128(inout4, Z3); Store128_buffer(heap6, outp, inout5, 0-0x10, Secret, out_b, count*6 + 5); Mov128(inout5, Hkey); } procedure Loop6x_partial( inline alg:algorithm, ghost h_LE:quad32, ghost y_prev:quad32, ghost data:seq(quad32), ghost count:nat, // Number of 6x128-bit blocks processed so far ghost in0_count:nat, ghost iv_b:buffer128, ghost in0_b:buffer128, ghost in_b:buffer128, ghost scratch_b:buffer128, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost hkeys_b:buffer128, //ghost ctr_BE_orig:quad32, ghost ctr_BE:quad32) returns(ghost init:quad32_6) {:public} {:quick} {:transform reorder, Loop6x_partial_expected_code} {:options z3rlimit(50), max_ifuel(0)} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; inp @= rdi; key @= rcx; ivp @= r8; Xip @= r9; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z0 @= xmm4; Z1 @= xmm5; Z2 @= xmm6; Z3 @= xmm7; Xi @= xmm8; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; counter @= rbx; constp @= r11; in0 @= r14; h := of_quad32(reverse_bytes_quad32(h_LE)); prev := of_quad32(reverse_bytes_quad32(y_prev)); reads inp; key; ivp; Xip; rbp; in0; memLayout; heap0; heap6; modifies counter; constp; r12; r13; Ii; T1; T2; Hkey; Z0; Z1; Z2; Z3; Xi; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; heap2; heap3; efl; requires sse_enabled && movbe_enabled; T2 == Mkfour(0, 0, 0, 0x1000000); // Valid ptrs and buffers validDstAddrs128(heap2, ivp, iv_b, 1, memLayout, Public); validSrcAddrsOffset128(heap6, in0, in0_b, in0_count * 6, 6, memLayout, Secret); validSrcAddrsOffset128(heap6, inp, in_b, count * 6, 6, memLayout, Secret); validDstAddrs128(heap3, rbp, scratch_b, 9, memLayout, Secret); inp + 0x60 < pow2_64; // AES reqs aes_reqs_offset(alg, key_words, round_keys, keys_b, key, heap0, memLayout); // aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rndkey == index(round_keys, 0); // Counter requirements T2 == Mkfour(0, 0, 0, 0x1000000); T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 0)); counter == ctr_BE.lo0 % 256; inout0 == quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 0)), rndkey); counter + 6 < 256 ==> tuple(inout0, inout1, inout2, inout3, inout4, inout5) == xor_reverse_inc32lite_6(1, 0, ctr_BE, rndkey); length(data) == 6; hkeys_b_powers(hkeys_b, heap0, memLayout, Xip - 0x20, h); scratch_b_data(true, true, scratch_b, 8, 5, heap3, memLayout, rbp, data); Z3 == reverse_bytes_quad32(data[5]); of_quad32(Xi) +. of_quad32(Z0) +. of_quad32(buffer128_read(scratch_b, 1, heap3)) == prev; // rbp + 0x10 * 8 <= pow2_64; ensures // Framing modifies_buffer_specific128(scratch_b, old(heap3), heap3, 1, 8); modifies_buffer_specific128(iv_b, old(heap2), heap2, 0, 0); // Semantics // Counters 0 <= counter < 256; counter == inc32lite(ctr_BE, 6).lo0 % 256; // Encryption results tuple(inout0, inout1, inout2, inout3, inout4, inout5) == rounds_opaque_6(init, round_keys, #nat(nr(alg) - 1)); r13 == reverse_bytes_nat64(hi64(buffer128_read(in0_b, in0_count * 6 + 0, heap6))); r12 == reverse_bytes_nat64(lo64(buffer128_read(in0_b, in0_count * 6 + 0, heap6))); let rk := index(round_keys, #nat(nr(alg))); tuple(T2, Ii, Z1, Z2, Z3, Hkey) == make_six_of(fun(i:int_range(0, 5)) quad32_xor(rk, buffer128_read(in_b, count * 6 + i, heap6))); buffer128_read(scratch_b, 8, heap3) == reverse_bytes_quad32(inc32lite(ctr_BE, 6)); //buffer128_read(iv_b, 0, heap2) == reverse_bytes_quad32(inc32lite(ctr_BE, 6)); // Byte reversals for use by GCM buffer128_read(scratch_b, 2, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, in0_count * 6 + 5, heap6))); buffer128_read(scratch_b, 3, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, in0_count * 6 + 4, heap6))); buffer128_read(scratch_b, 4, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, in0_count * 6 + 3, heap6))); buffer128_read(scratch_b, 5, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, in0_count * 6 + 2, heap6))); buffer128_read(scratch_b, 6, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, in0_count * 6 + 1, heap6))); init == make_six_of(fun (n:int_range(0, 5)) quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, n)), index(round_keys, 0))); let eventual_Xi := of_quad32(Xi) +. of_quad32(buffer128_read(scratch_b, 1, heap3)) +. of_quad32(Z0); eventual_Xi == of_quad32(reverse_bytes_quad32(ghash_incremental(h_LE, y_prev, data))); { init := make_six_of(fun (n:int_range(0, 5)) quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, n)), rndkey)); let start := in0_count * 6; Loop6x_preamble(alg, h_LE, iv_b, scratch_b, key_words, round_keys, keys_b, hkeys_b, ctr_BE); Loop6x_plain(alg, 0, key_words, round_keys, keys_b, init, T2); MulAdd_step(0, 0, T1, Z1, Z2, Z3, Z3, Hkey, hkeys_b, scratch_b, h, prev, data); MulAdd_step(1, 1, Z1, T1, T2, Hkey, Ii, Hkey, hkeys_b, scratch_b, h, prev, data); Loop6x_plain(alg, 1, key_words, round_keys, keys_b, init, rndkey); Loop6x_reverse128(5, 2, start, in0_b, scratch_b); MulAdd_step(2, 3, T1, T2, Hkey, Z1, Ii, Z1, hkeys_b, scratch_b, h, prev, data); Loop6x_plain(alg, 2, key_words, round_keys, keys_b, init, rndkey); MulAdd_step(3, 4, T2, Hkey, Z1, T1, Ii, T1, hkeys_b, scratch_b, h, prev, data); Loop6x_plain(alg, 3, key_words, round_keys, keys_b, init, rndkey); Loop6x_reverse128(4, 3, start, in0_b, scratch_b); MulAdd_step(4, 6, Hkey, Z1, T1, T2, Ii, T2, hkeys_b, scratch_b, h, prev, data); Loop6x_plain(alg, 4, key_words, round_keys, keys_b, init, rndkey); Loop6x_reverse128(3, 4, start, in0_b, scratch_b); MulAdd_step(5, 7, T2, Z1, T1, Xi, Xi, Hkey, hkeys_b, scratch_b, h, prev, data); Loop6x_plain(alg, 5, key_words, round_keys, keys_b, init, rndkey); Loop6x_reverse128(2, 5, start, in0_b, scratch_b); Load_0xc2_msb(Hkey); // ZeroXmm(Hkey); // PinsrqImm(Hkey, 0xc200000000000000, 1, constp); /* XXX[jb]: Changed from PinsrdImm(Hkey, 0xc2000000, 3, r12). */ ReduceLast(false, h_LE, y_prev, data); Loop6x_plain(alg, 6, key_words, round_keys, keys_b, init, rndkey); Loop6x_reverse128(1, 6, start, in0_b, scratch_b); Loop6x_plain(alg, 7, key_words, round_keys, keys_b, init, T1); LoadBe64_buffer128(heap6, r13, in0, 0 * 16 + 8, Secret, true, in0_b, in0_count * 6 + 0); LoadBe64_buffer128(heap6, r12, in0, 0 * 16, Secret, false, in0_b, in0_count * 6 + 0); Loop6x_plain(alg, 8, key_words, round_keys, keys_b, init, rndkey); /* XXX[jb] Moved from Loop6x_round9 */ inline if (alg = AES_256) { /* XXX[jb]: Note that this is placed _after_ round8, unlike what existed in the older AESGCM.vaf. Thus, this code couldn't _directly_ be copied over from what existed in AESGCM.vaf. Instead, I've reordered it, so that this is _strictly_ the last few rounds, rather than being then (n-1)..(n-x)th rounds. Additionally, I've performed _some_ cleaning up of the code. To reuse things we already have in this file :) */ /* XXX[jb]: This chunk of code could be made part of Loop6x_plain. The only difference is rndkey<-->T1. */ Load128_buffer(heap0, T1, key, 0xa0-0x80, Secret, keys_b, 10); VAESNI_enc(inout0, inout0, T1); VAESNI_enc(inout1, inout1, T1); VAESNI_enc(inout2, inout2, T1); VAESNI_enc(inout3, inout3, T1); VAESNI_enc(inout4, inout4, T1); VAESNI_enc(inout5, inout5, T1); eval_rounds_reveal(); commute_sub_bytes_shift_rows_forall(); Loop6x_plain(alg, 10, key_words, round_keys, keys_b, init, rndkey); /* XXX[jb]: This chunk of code could be made part of Loop6x_plain. The only difference is rndkey<-->T1. */ Load128_buffer(heap0, T1, key, 0xc0-0x80, Secret, keys_b, 12); VAESNI_enc(inout0, inout0, T1); VAESNI_enc(inout1, inout1, T1); VAESNI_enc(inout2, inout2, T1); VAESNI_enc(inout3, inout3, T1); VAESNI_enc(inout4, inout4, T1); VAESNI_enc(inout5, inout5, T1); Loop6x_plain(alg, 12, key_words, round_keys, keys_b, init, rndkey); } inline if (alg = AES_256) { Load128_buffer(heap0, T1, key, 0xe0-0x80, Secret, keys_b, 14); } else { Load128_buffer(heap0, T1, key, 0xa0-0x80, Secret, keys_b, 10); } Loop6x_round9(alg, count, in_b, scratch_b, key_words, round_keys, keys_b); } #verbatim{:interface} #reset-options "--z3rlimit 50" #endverbatim procedure Loop6x( inline alg:algorithm, ghost h_LE:quad32, ghost y_orig:quad32, ghost y_prev:quad32, ghost count:nat, // Number of 6x128-bit blocks processed so far ghost iv_b:buffer128, ghost in0_b:buffer128, ghost in_b:buffer128, ghost out_b:buffer128, ghost scratch_b:buffer128, ghost plain_quads:seq(quad32), ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost hkeys_b:buffer128, ghost ctr_BE_orig:quad32, ghost ctr_BE:quad32) returns(ghost y_new:quad32) {:public} {:quick} {:options z3rlimit(100)} lets inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z0 @= xmm4; Z1 @= xmm5; Z2 @= xmm6; Z3 @= xmm7; Xi @= xmm8; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; counter @= rbx; constp @= r11; in0 @= r14; // h := of_quad32(reverse_bytes_quad32(h_LE)); reads rbp; key; ivp; Xip; memLayout; heap0; modifies inp; outp; len; counter; constp; r12; r13; in0; Ii; T1; T2; Hkey; Z0; Z1; Z2; Z3; Xi; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; heap6; heap2; heap3; efl; requires sse_enabled && movbe_enabled; count >= 2; len >= 6; T2 == Mkfour(0, 0, 0, 0x1000000); // Valid ptrs and buffers validDstAddrs128(heap2, ivp, iv_b, 1, memLayout, Public); validSrcAddrsOffset128(heap6, in0, in0_b, (count - 1) * 6, 6, memLayout, Secret); validSrcAddrsOffset128(heap6, inp, in_b, count * 6, 6, memLayout, Secret); validDstAddrsOffset128(heap6, outp, out_b, count * 6, 6, memLayout, Secret); validDstAddrs128(heap3, rbp, scratch_b, 9, memLayout, Secret); validSrcAddrs128(heap0, Xip - 0x20, hkeys_b, 8, memLayout, Secret); partial_seq_agreement(plain_quads, s128(heap6, in_b), count * 6, count * 6 + 6); buffers_disjoint128(in_b, out_b) || in_b == out_b; in0_b == out_b; inp + 0x60 < pow2_64; in0 + 0x60 < pow2_64; outp + 0x60 < pow2_64; // AES reqs aes_reqs_offset(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rndkey == index(round_keys, 0); // GHash reqs pclmulqdq_enabled; h_LE == aes_encrypt_LE(alg, key_words, Mkfour(0, 0, 0, 0)); hkeys_reqs_priv(s128(heap0, hkeys_b), reverse_bytes_quad32(h_LE)); scratch_reqs(scratch_b, #nat(count - 2), heap3, s128(heap6, in0_b), Z3); y_prev == ghash_incremental0(h_LE, y_orig, slice(s128(heap6, in0_b), 0, #nat(count - 2) * 6)); y_prev == reverse_bytes_quad32(to_quad32( of_quad32(Xi) +. of_quad32(Z0) +. of_quad32(buffer128_read(scratch_b, 1, heap3)))); // rbp + 0x10 * 8 <= pow2_64; // Counter requirements count * 6 + 6 < pow2_32; ctr_BE == inc32lite(ctr_BE_orig, count * 6); T2 == Mkfour(0, 0, 0, 0x1000000); T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 0)); counter == ctr_BE.lo0 % 256; inout0 == quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 0)), rndkey); counter + 6 < 256 ==> tuple(inout0, inout1, inout2, inout3, inout4, inout5) == xor_reverse_inc32lite_6(1, 0, ctr_BE, rndkey); // GCTR progress gctr_partial(alg, 6 * count, plain_quads, s128(heap6, out_b), key_words, ctr_BE_orig); ensures // Framing len > 0 ==> modifies_buffer_specific128(out_b, old(heap6), heap6, count * 6 + 0, count * 6 + 5); len == 0 ==> heap6 == old(heap6); modifies_buffer_specific128(scratch_b, old(heap3), heap3, 1, 8); modifies_buffer_specific128(iv_b, old(heap2), heap2, 0, 0); // Pointer tracking len == old(len) - 6; inp == old(inp) + 0x60; in0 == old(in0) + 0x60; outp == old(outp) + 0x60; rndkey == index(round_keys, 0); // Counters T2 == Mkfour(0, 0, 0, 0x1000000); T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 6)); let z3' := (if len = 0 then Z3 else reverse_bytes_quad32(inc32lite(ctr_BE, 10))); counter + 6 < 256 ==> tuple(T1, Ii, Z1, Z2, z3', Hkey) == xor_reverse_inc32lite_6(0, 6, ctr_BE, rndkey); counter == inc32lite(ctr_BE, 6).lo0 % 256; // Encryption results len == 0 ==> gctr_registers(inout0, inout1, inout2, inout3, inout4, inout5, plain_quads, alg, key_words, ctr_BE_orig, count); let inout4' := (if counter + 6 < 256 then reverse_bytes_quad32(inc32lite(ctr_BE, 10)) else inout4); len > 0 ==> tuple(inout0, inout1, inout2, inout3, inout4, inout5) == tuple(quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 6)), rndkey), Ii, Z1, Z2, inout4', Hkey); len > 0 ==> make_six_of(fun(i:int_range(0, 5)) buffer128_read(out_b, count * 6 + i, heap6)) == make_six_of(fun(i:int_range(0, 5)) quad32_xor( old(buffer128_read(in_b, count * 6 + i, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE, i)))); // GCTR progress len > 0 ==> gctr_partial(alg, 6 * (count + 1), plain_quads, s128(heap6, out_b), key_words, ctr_BE_orig); // GHash progress y_new == ghash_incremental0(h_LE, y_orig, slice(s128(heap6, in0_b), 0, #nat(count - 1) * 6)); len > 0 ==> y_new == reverse_bytes_quad32(to_quad32( of_quad32(Xi) +. of_quad32(Z0) +. of_quad32(buffer128_read(scratch_b, 1, heap3)))); len == 0 ==> Xi == reverse_bytes_quad32(y_new); len > 0 ==> scratch_reqs(scratch_b, #nat(count - 1), heap3, old(s128(heap6, in0_b)), Z3); len == 0 ==> scratch_reqs(scratch_b, #nat(count - 1), heap3, old(s128(heap6, in0_b)), buffer128_read(scratch_b, 2, heap3)); // // Byte reversals for use by GCM // buffer128_read(scratch_b, 2, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, (count-2)*6 + 5, heap6))); // buffer128_read(scratch_b, 3, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, (count-2)*6 + 4, heap6))); // buffer128_read(scratch_b, 4, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, (count-2)*6 + 3, heap6))); // buffer128_read(scratch_b, 5, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, (count-2)*6 + 2, heap6))); // buffer128_read(scratch_b, 6, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, (count-2)*6 + 1, heap6))); // buffer128_read(scratch_b, 7, heap3) == old(reverse_bytes_quad32(buffer128_read(in0_b, (count-2)*6 + 0, heap6))); { let prev := of_quad32(Xi) +. of_quad32(Z0) +. of_quad32(buffer128_read(scratch_b, 1, heap3)); let y_prev := reverse_bytes_quad32(to_quad32(prev)); let data := Seq.slice(s128(heap6, in0_b), #nat(count - 2) * 6, #nat(count - 2) * 6 + 6); lemma_of_to_quad32(prev); let init := Loop6x_partial(alg, h_LE, y_prev, data, count, #nat(count - 1), iv_b, in0_b, in_b, scratch_b, key_words, round_keys, keys_b, hkeys_b, ctr_BE); let ctrs := make_six_of(fun(i:int_range(0, 5)) reverse_bytes_quad32(inc32lite(ctr_BE, i))); let plains := make_six_of(fun(i:int_range(0, 5)) buffer128_read(in_b, count * 6 + i, heap6)); Loop6x_final(alg, iv_b, scratch_b, key_words, round_keys, keys_b, inc32lite(ctr_BE, 6), init, ctrs, plains, buffer128_read(in0_b, (count - 1) * 6 + 0, heap6)); Sub64(len, 6); Add64(in0, 0x60); y_new := ghash_incremental0(h_LE, y_prev, data); let mem_snap := heap6; lemma_ghash_incremental0_append(h_LE, y_orig, y_prev, y_new, slice(s128(heap6, in0_b), 0, #nat(count - 2) * 6), data); assert y_new == ghash_incremental0(h_LE, y_orig, append(slice(s128(heap6, in0_b), 0, #nat(count - 2) * 6), data)); assert equal( append(slice(s128(heap6, in0_b), 0, #nat(count - 2) * 6), data), slice(s128(heap6, in0_b), 0, #nat(count - 1) * 6)); assert y_new == ghash_incremental0(h_LE, y_orig, slice(s128(heap6, in0_b), 0, #nat(count - 1) * 6)); if (len > 0) { Loop6x_save_output(count, out_b); assert equal( slice(s128(heap6, out_b), 0, #nat(count - 1) * 6), slice(s128(mem_snap, out_b), 0, #nat(count - 1) * 6)); Load128_buffer(heap3, Z3, rbp, 0x20, Secret, scratch_b, 2); // # I[5] let plain := old(s128(heap6, in_b)); let cipher := s128(heap6, out_b); let bound := count * 6; gctr_partial_opaque_ignores_postfix( alg, #nat32(bound), plain_quads, plain_quads, old(buffer128_as_seq(heap6, out_b)), cipher, key_words, ctr_BE_orig); gctr_partial_extend6(alg, bound, plain_quads, cipher, key_words, ctr_BE_orig); lemma_add_manip(of_quad32(Xi), of_quad32(buffer128_read(scratch_b, 1, heap3)), of_quad32(Z0)); assert y_new == reverse_bytes_quad32(to_quad32(of_quad32(reverse_bytes_quad32(ghash_incremental(h_LE, y_prev, data))))); } else { VPolyAdd(Xi, Xi, Mem128(heap3, rbp, 0x10, Secret, scratch_b, 1)); VPolyAdd(Xi, Xi, Z0); assert of_quad32(Xi) == of_quad32(reverse_bytes_quad32(ghash_incremental(h_LE, y_prev, data))); assert to_quad32(of_quad32(Xi)) == to_quad32(of_quad32(reverse_bytes_quad32(ghash_incremental(h_LE, y_prev, data)))); assert Xi == reverse_bytes_quad32(ghash_incremental(h_LE, y_prev, data)); } gctr_registers_reveal(); } /* #reset-options "--z3rlimit 600" procedure Loop6x_loop( inline alg:algorithm, ghost count:nat, // Number of 6x128-bit blocks processed so far ghost iv_b:buffer128, ghost in0_b:buffer128, ghost in_b:buffer128, ghost out_b:buffer128, ghost scratch_b:buffer128, ghost plain_quads:seq(quad32), ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost ctr_BE_orig:quad32, ghost ctr_BE:quad32) {:quick exportOnly} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z1 @= xmm5; Z2 @= xmm6; Z3 @= xmm7; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; counter @= rbx; constp @= r11; in0 @= r14; reads rax; rbp; r9; r10; r15; xmm4; xmm8; key; ivp; rsp; memLayout; modifies inp; outp; len; counter; constp; r12; r13; in0; Ii; T1; T2; Hkey; Z1; Z2; Z3; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; heap0; efl; requires this.vs_ok; count >= 2; 6 <= len && count*6 + len + 6 < pow2_32; len % 6 == 0; T2 == Mkfour(0, 0, 0, 0x1000000); // Valid ptrs and buffers validDstAddrs128(heap0, ivp, iv_b, 1, memLayout, Secret); validSrcAddrsOffset128(heap6, in0, in0_b, (count-2)*6, len, memLayout, Secret); validSrcAddrsOffset128(heap6, inp, in_b, count*6, len, memLayout, Secret); validDstAddrsOffset128(heap6, outp, out_b, count*6, len, memLayout, Secret); validDstAddrs128(heap3, rsp, scratch_b, 8, memLayout, Secret); buffers_disjoint128(iv_b, keys_b); buffers_disjoint128(iv_b, scratch_b); buffers_disjoint128(iv_b, in0_b); buffers_disjoint128(iv_b, in_b); buffers_disjoint128(iv_b, out_b); buffers_disjoint128(scratch_b, keys_b); buffers_disjoint128(scratch_b, in0_b); buffers_disjoint128(scratch_b, in_b); buffers_disjoint128(scratch_b, out_b); buffers_disjoint128(out_b, keys_b); buffers_disjoint128(in_b, out_b) || in_b == out_b; partial_seq_agreement(plain_quads, buffer128_as_seq(heap6, in_b), count*6, length(buffer128_as_seq(heap6, in_b))); inp + 0x10*len < pow2_64; in0 + 0x10*len < pow2_64; outp + 0x10*len < pow2_64; // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rndkey == index(round_keys, 0); // Counter requirements count*6 + 6 < pow2_32; ctr_BE == inc32lite(ctr_BE_orig, #nat32(count*6)); T2 == Mkfour(0, 0, 0, 0x1000000); T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 0)); counter == ctr_BE.lo0 % 256; inout0 == quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 0)), rndkey); counter + 6 < 256 ==> inout1 == reverse_bytes_quad32(inc32lite(ctr_BE, 1)); counter + 6 < 256 ==> inout2 == reverse_bytes_quad32(inc32lite(ctr_BE, 2)); counter + 6 < 256 ==> inout3 == reverse_bytes_quad32(inc32lite(ctr_BE, 3)); counter + 6 < 256 ==> inout4 == reverse_bytes_quad32(inc32lite(ctr_BE, 4)); counter + 6 < 256 ==> inout5 == reverse_bytes_quad32(inc32lite(ctr_BE, 5)); // GCTR progress gctr_partial(alg, 6*count, plain_quads, buffer128_as_seq(heap6, out_b), key_words, ctr_BE_orig); ensures this.vs_ok; // Framing modifies_buffer128_3(scratch_b, out_b, iv_b, old(heap0), heap0); buffer_modifies_specific128(scratch_b, old(heap3), heap3, 2, 7); buffer_modifies_specific128(out_b, old(heap6), heap6, count*6, #nat(count*6+old(len)-1)); buffer_modifies_specific128(iv_b, old(heap2), heap2, 0, 0); rax == old(rax); rcx == old(rcx); rsp == old(rsp); rbp == old(rbp); r8 == old(r8); r9 == old(r9); r10 == old(r10); r15 == old(r15); xmm4 == old(xmm4); xmm8 == old(xmm8); // Semantics // Pointers in0 == old( in0) + 0x10*old(len); inp == old( inp) + 0x10*old(len); outp == old(outp) + 0x10*old(len); gctr_partial(alg, #nat(6*count + old(len) - 6), plain_quads, buffer128_as_seq(heap6, out_b), key_words, ctr_BE_orig); inout0 == quad32_xor(old(buffer128_read(in_b, 6*count+old(len)-6 + 0, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + old(len) - 6)))); inout1 == quad32_xor(old(buffer128_read(in_b, 6*count+old(len)-6 + 1, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + old(len) - 6 + 1)))); inout2 == quad32_xor(old(buffer128_read(in_b, 6*count+old(len)-6 + 2, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + old(len) - 6 + 2)))); inout3 == quad32_xor(old(buffer128_read(in_b, 6*count+old(len)-6 + 3, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + old(len) - 6 + 3)))); inout4 == quad32_xor(old(buffer128_read(in_b, 6*count+old(len)-6 + 4, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + old(len) - 6 + 4)))); inout5 == quad32_xor(old(buffer128_read(in_b, 6*count+old(len)-6 + 5, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + old(len) - 6 + 5)))); { ghost var iter:nat := 0; ghost var ctr := ctr_BE; while (len > 0) invariant // Manual framing this.vs_ok; rax == old(rax); rcx == old(rcx); rsp == old(rsp); rbp == old(rbp); r8 == old(r8); r9 == old(r9); r10 == old(r10); r15 == old(r15); xmm4 == old(xmm4); xmm8 == old(xmm8); len == old(len) - 6*iter; in0 == old( in0) + 0x60*iter; inp == old( inp) + 0x60*iter; outp == old(outp) + 0x60*iter; T2 == Mkfour(0, 0, 0, 0x1000000); // Valid ptrs and buffers validDstAddrs128(heap0, ivp, iv_b, 1, memLayout, Secret); len > 0 ==> validSrcAddrsOffset128(heap6, in0, in0_b, (count-2)*6 + iter*6, old(len) - iter*6, memLayout, Secret); len > 0 ==> validSrcAddrsOffset128(heap6, inp, in_b, count*6 + iter*6, old(len) - iter*6, memLayout, Secret); len > 0 ==> validDstAddrsOffset128(heap6, outp, out_b, count*6 + iter*6, old(len) - iter*6, memLayout, Secret); validDstAddrs128(heap3, rsp, scratch_b, 8, memLayout, Secret); buffers_disjoint128(iv_b, keys_b); buffers_disjoint128(iv_b, scratch_b); buffers_disjoint128(iv_b, in0_b); buffers_disjoint128(iv_b, in_b); buffers_disjoint128(iv_b, out_b); buffers_disjoint128(scratch_b, keys_b); buffers_disjoint128(scratch_b, in0_b); buffers_disjoint128(scratch_b, in_b); buffers_disjoint128(scratch_b, out_b); buffers_disjoint128(out_b, keys_b); buffers_disjoint128(in_b, out_b) || in_b == out_b; old(length(buffer128_as_seq(heap6, in_b))) >= 6; len > 0 ==> partial_seq_agreement(plain_quads, buffer128_as_seq(heap6, in_b), 6*count + 6*iter, old(length(buffer128_as_seq(heap6, in_b)))); len > 0 ==> inp + 0x10*len < pow2_64; len > 0 ==> in0 + 0x10*len < pow2_64; len > 0 ==> outp + 0x10*len < pow2_64; modifies_buffer128_3(scratch_b, out_b, iv_b, old(heap0), heap0); buffer_modifies_specific128(scratch_b, old(heap3), heap3, 2, 7); buffer_modifies_specific128(out_b, old(heap6), heap6, count*6, count*6+iter*6+5); len == 0 ==> old(len) >= 6 && buffer_modifies_specific128(out_b, old(heap6), heap6, count*6, #nat(count*6+old(len)-1)); buffer_modifies_specific128(iv_b, old(heap2), heap2, 0, 0); // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rndkey == index(round_keys, 0); // Counter requirements len % 6 == 0; len < pow2_32; 6 <= old(len) && count*6 + old(len) + 6 < pow2_32; count*6 + iter*6 + 6 < pow2_32; len > 0 ==> len >= 6; ctr == inc32lite(ctr_BE_orig, #nat32(6*count + iter*6)); T2 == Mkfour(0, 0, 0, 0x1000000); T1 == reverse_bytes_quad32(inc32lite(ctr, 0)); counter == ctr.lo0 % 256; len > 0 ==> inout0 == quad32_xor(reverse_bytes_quad32(inc32lite(ctr, 0)), rndkey); len > 0 ==> counter + 6 < 256 ==> inout1 == reverse_bytes_quad32(inc32lite(ctr, 1)); len > 0 ==> counter + 6 < 256 ==> inout2 == reverse_bytes_quad32(inc32lite(ctr, 2)); len > 0 ==> counter + 6 < 256 ==> inout3 == reverse_bytes_quad32(inc32lite(ctr, 3)); len > 0 ==> counter + 6 < 256 ==> inout4 == reverse_bytes_quad32(inc32lite(ctr, 4)); len > 0 ==> counter + 6 < 256 ==> inout5 == reverse_bytes_quad32(inc32lite(ctr, 5)); len == 0 ==> inout0 == quad32_xor(old(buffer128_read(in_b, (count+iter-1)*6 + 0, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + 6*(iter-1))))); len == 0 ==> inout1 == quad32_xor(old(buffer128_read(in_b, (count+iter-1)*6 + 1, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + 6*(iter-1) + 1)))); len == 0 ==> inout2 == quad32_xor(old(buffer128_read(in_b, (count+iter-1)*6 + 2, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + 6*(iter-1) + 2)))); len == 0 ==> inout3 == quad32_xor(old(buffer128_read(in_b, (count+iter-1)*6 + 3, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + 6*(iter-1) + 3)))); len == 0 ==> inout4 == quad32_xor(old(buffer128_read(in_b, (count+iter-1)*6 + 4, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + 6*(iter-1) + 4)))); len == 0 ==> inout5 == quad32_xor(old(buffer128_read(in_b, (count+iter-1)*6 + 5, heap6)), aes_encrypt_BE(alg, key_words, inc32lite(ctr_BE_orig, #nat32(6*count + 6*(iter-1) + 5)))); // GCTR progress len > 0 ==> gctr_partial(alg, 6*count + 6*iter, plain_quads, buffer128_as_seq(heap6, out_b), key_words, ctr_BE_orig); len == 0 ==> gctr_partial(alg, #nat(6*count + 6*(iter-1)), plain_quads, buffer128_as_seq(heap6, out_b), key_words, ctr_BE_orig); decreases len; { Loop6x(alg, count+iter, iv_b, in0_b, in_b, out_b, scratch_b, plain_quads, key_words, round_keys, keys_b, ctr_BE_orig, ctr); iter := #nat(iter + 1); ctr := inc32(ctr, 6); } } #reset-options "--z3rlimit 40" procedure AESNI_ctr32_6x_preamble( inline alg:algorithm, // OpenSSL includes the number of rounds (nr) as a dynamic parameter (stored with the key). Saves code space but adds extra instructions to the fast path. Maybe branch predictor is good enough for it not to matter ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost ctr_orig:quad32) {:quick} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; key @= rcx; T1 @= xmm1; T2 @= xmm2; Z0 @= xmm4; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; constp @= r11; reads key; heap0; memLayout; modifies constp; r12; T1; T2; Z0; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; efl; requires // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); key - 0x60 >= 0; ctr_orig.lo0 % 256 + 6 < 256; T1 == reverse_bytes_quad32(inc32lite(ctr_orig, 0)); ensures inout0 == eval_rounds(quad32_xor(reverse_bytes_quad32(inc32lite(ctr_orig, 0)), index(round_keys, 0)), round_keys, 0); inout1 == eval_rounds(quad32_xor(reverse_bytes_quad32(inc32lite(ctr_orig, 1)), index(round_keys, 0)), round_keys, 0); inout2 == eval_rounds(quad32_xor(reverse_bytes_quad32(inc32lite(ctr_orig, 2)), index(round_keys, 0)), round_keys, 0); inout3 == eval_rounds(quad32_xor(reverse_bytes_quad32(inc32lite(ctr_orig, 3)), index(round_keys, 0)), round_keys, 0); inout4 == eval_rounds(quad32_xor(reverse_bytes_quad32(inc32lite(ctr_orig, 4)), index(round_keys, 0)), round_keys, 0); inout5 == eval_rounds(quad32_xor(reverse_bytes_quad32(inc32lite(ctr_orig, 5)), index(round_keys, 0)), round_keys, 0); T1 == reverse_bytes_quad32(inc32lite(ctr_orig, 6)); rndkey == index(round_keys, 1); { Load128_buffer(heap0, Z0, key, 0x00-0x80, Secret, keys_b, 0); // # borrow $Z0 for $rndkey load_one_msb(); // # borrow $T2, .Lone_msb Load128_buffer(heap0, rndkey, key, 0x10-0x80, Secret, keys_b, 1); //OpenSSL uses AddLea64(r12, key, 0x20-0x80) instead of the next two instructions Mov64(r12, key); Sub64(r12, 0x60); VPxor(inout0, T1, Z0); VPaddd(inout1, T1, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout1, 1); VPaddd(inout2, inout1, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout2, 2); VPxor(inout1, inout1, Z0); VPaddd(inout3, inout2, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout3, 3); VPxor(inout2, inout2, Z0); VPaddd(inout4, inout3, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout4, 4); VPxor(inout3, inout3, Z0); VPaddd(inout5, inout4, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout5, 5); VPxor(inout4, inout4, Z0); VPaddd(T1, inout5, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), T1, 6); VPxor(inout5, inout5, Z0); init_rounds_opaque(inout0, round_keys); init_rounds_opaque(inout1, round_keys); init_rounds_opaque(inout2, round_keys); init_rounds_opaque(inout3, round_keys); init_rounds_opaque(inout4, round_keys); init_rounds_opaque(inout5, round_keys); } #reset-options "" procedure AESNI_ctr32_6x_loop_body( inline alg:algorithm, inline rnd:nat, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost init0:quad32, ghost init1:quad32, ghost init2:quad32, ghost init3:quad32, ghost init4:quad32, ghost init5:quad32) {:quick} lets key @= rcx; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; reads key; heap0; memLayout; modifies inout0; inout1; inout2; inout3; inout4; inout5; rndkey; efl; requires // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rnd + 2 < length(round_keys); rndkey == index(round_keys, rnd+1); inout0 == eval_rounds(init0, round_keys, rnd); inout1 == eval_rounds(init1, round_keys, rnd); inout2 == eval_rounds(init2, round_keys, rnd); inout3 == eval_rounds(init3, round_keys, rnd); inout4 == eval_rounds(init4, round_keys, rnd); inout5 == eval_rounds(init5, round_keys, rnd); ensures inout0 == eval_rounds(init0, round_keys, rnd + 1); inout1 == eval_rounds(init1, round_keys, rnd + 1); inout2 == eval_rounds(init2, round_keys, rnd + 1); inout3 == eval_rounds(init3, round_keys, rnd + 1); inout4 == eval_rounds(init4, round_keys, rnd + 1); inout5 == eval_rounds(init5, round_keys, rnd + 1); rndkey == index(round_keys, rnd+2); { VAESNI_enc(inout0, inout0, rndkey); VAESNI_enc(inout1, inout1, rndkey); VAESNI_enc(inout2, inout2, rndkey); VAESNI_enc(inout3, inout3, rndkey); VAESNI_enc(inout4, inout4, rndkey); VAESNI_enc(inout5, inout5, rndkey); eval_rounds_reveal(); commute_sub_bytes_shift_rows_forall(); Load128_buffer(heap0, rndkey, key, 16*(rnd+2)-0x80, Secret, keys_b, rnd+2); } procedure AESNI_ctr32_6x_loop_recursive( inline alg:algorithm, inline rnd:nat, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost init0:quad32, ghost init1:quad32, ghost init2:quad32, ghost init3:quad32, ghost init4:quad32, ghost init5:quad32) {:quick exportOnly} {:recursive} lets key @= rcx; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; reads key; heap0; memLayout; modifies inout0; inout1; inout2; inout3; inout4; inout5; rndkey; efl; requires // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rnd + 2 < length(round_keys); rndkey == index(round_keys, 1); inout0 == eval_rounds(init0, round_keys, 0); inout1 == eval_rounds(init1, round_keys, 0); inout2 == eval_rounds(init2, round_keys, 0); inout3 == eval_rounds(init3, round_keys, 0); inout4 == eval_rounds(init4, round_keys, 0); inout5 == eval_rounds(init5, round_keys, 0); ensures inout0 == eval_rounds(init0, round_keys, rnd + 1); inout1 == eval_rounds(init1, round_keys, rnd + 1); inout2 == eval_rounds(init2, round_keys, rnd + 1); inout3 == eval_rounds(init3, round_keys, rnd + 1); inout4 == eval_rounds(init4, round_keys, rnd + 1); inout5 == eval_rounds(init5, round_keys, rnd + 1); rndkey == index(round_keys, rnd+2); { inline if (rnd > 0) { AESNI_ctr32_6x_loop_recursive(alg, #nat(rnd-1), key_words, round_keys, keys_b, init0, init1, init2, init3, init4, init5); } AESNI_ctr32_6x_loop_body(alg, rnd, key_words, round_keys, keys_b, init0, init1, init2, init3, init4, init5); } procedure AESNI_ctr32_6x_round9( inline alg:algorithm, ghost count:nat, ghost in_b:buffer128, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost init0:quad32, ghost init1:quad32, ghost init2:quad32, ghost init3:quad32, ghost init4:quad32, ghost init5:quad32) {:quick} lets inp @= rdi; key @= rcx; T2 @= xmm2; Hkey @= xmm3; Z0 @= xmm4; Z1 @= xmm5; Z2 @= xmm6; Xi @= xmm8; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; reads key; heap0; memLayout; modifies inp; T2; Hkey; Z0; Z1; Z2; Xi; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; efl; requires validSrcAddrsOffset128(heap6, inp, in_b, count*6, 6, memLayout, Secret); inp + 0x60 < pow2_64; // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); rndkey == index(round_keys, #nat(nr(alg)-1)); inout0 == eval_rounds(init0, round_keys, #nat(nr(alg)-2)); inout1 == eval_rounds(init1, round_keys, #nat(nr(alg)-2)); inout2 == eval_rounds(init2, round_keys, #nat(nr(alg)-2)); inout3 == eval_rounds(init3, round_keys, #nat(nr(alg)-2)); inout4 == eval_rounds(init4, round_keys, #nat(nr(alg)-2)); inout5 == eval_rounds(init5, round_keys, #nat(nr(alg)-2)); ensures inout0 == eval_rounds(init0, round_keys, #nat(nr(alg)-1)); inout1 == eval_rounds(init1, round_keys, #nat(nr(alg)-1)); inout2 == eval_rounds(init2, round_keys, #nat(nr(alg)-1)); inout3 == eval_rounds(init3, round_keys, #nat(nr(alg)-1)); inout4 == eval_rounds(init4, round_keys, #nat(nr(alg)-1)); inout5 == eval_rounds(init5, round_keys, #nat(nr(alg)-1)); let rk := index(round_keys, #nat(nr(alg))); Z0 == quad32_xor(rk, buffer128_read(in_b, count*6 + 0, heap6)); Z1 == quad32_xor(rk, buffer128_read(in_b, count*6 + 1, heap6)); Z2 == quad32_xor(rk, buffer128_read(in_b, count*6 + 2, heap6)); Xi == quad32_xor(rk, buffer128_read(in_b, count*6 + 3, heap6)); T2 == quad32_xor(rk, buffer128_read(in_b, count*6 + 4, heap6)); Hkey == quad32_xor(rk, buffer128_read(in_b, count*6 + 5, heap6)); inp == old(inp) + 0x60; { Load128_buffer(heap0, Hkey, key, 0xa0-0x80, Secret, keys_b, 10); // # last round key VAESNI_enc(inout0, inout0, rndkey); VPxor(Z0, Hkey, Mem128(heap6, inp, 0x00, Secret, in_b, count*6 + 0)); VAESNI_enc(inout1, inout1, rndkey); VPxor(Z1, Hkey, Mem128(heap6, inp, 0x10, Secret, in_b, count*6 + 1)); VAESNI_enc(inout2, inout2, rndkey); VPxor(Z2, Hkey, Mem128(heap6, inp, 0x20, Secret, in_b, count*6 + 2)); VAESNI_enc(inout3, inout3, rndkey); VPxor(Xi, Hkey, Mem128(heap6, inp, 0x30, Secret, in_b, count*6 + 3)); VAESNI_enc(inout4, inout4, rndkey); VPxor(T2, Hkey, Mem128(heap6, inp, 0x40, Secret, in_b, count*6 + 4)); VAESNI_enc(inout5, inout5, rndkey); VPxor(Hkey, Hkey, Mem128(heap6, inp, 0x50, Secret, in_b, count*6 + 5)); eval_rounds_reveal(); commute_sub_bytes_shift_rows_forall(); AddLea64(inp, inp, 0x60); } procedure AESNI_ctr32_6x_final( inline alg:algorithm, ghost count:nat, ghost out_b:buffer128, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost init0:quad32, ghost init1:quad32, ghost init2:quad32, ghost init3:quad32, ghost init4:quad32, ghost init5:quad32, ghost ctr0:quad32, ghost ctr1:quad32, ghost ctr2:quad32, ghost ctr3:quad32, ghost ctr4:quad32, ghost ctr5:quad32, ghost plain0:quad32, ghost plain1:quad32, ghost plain2:quad32, ghost plain3:quad32, ghost plain4:quad32, ghost plain5:quad32) {:quick} lets outp @= rsi; key @= rcx; T2 @= xmm2; Hkey @= xmm3; Z0 @= xmm4; Z1 @= xmm5; Z2 @= xmm6; Xi @= xmm8; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; reads key; memLayout; modifies outp; T2; Hkey; Z0; Z1; Z2; Xi; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; heap0; efl; requires validDstAddrsOffset128(heap6, outp, out_b, count*6, 6, memLayout, Secret); outp + 0x60 < pow2_64; // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); init0 == quad32_xor(ctr0, index(round_keys, 0)); init1 == quad32_xor(ctr1, index(round_keys, 0)); init2 == quad32_xor(ctr2, index(round_keys, 0)); init3 == quad32_xor(ctr3, index(round_keys, 0)); init4 == quad32_xor(ctr4, index(round_keys, 0)); init5 == quad32_xor(ctr5, index(round_keys, 0)); inout0 == eval_rounds(init0, round_keys, #nat(nr(alg)-1)); inout1 == eval_rounds(init1, round_keys, #nat(nr(alg)-1)); inout2 == eval_rounds(init2, round_keys, #nat(nr(alg)-1)); inout3 == eval_rounds(init3, round_keys, #nat(nr(alg)-1)); inout4 == eval_rounds(init4, round_keys, #nat(nr(alg)-1)); inout5 == eval_rounds(init5, round_keys, #nat(nr(alg)-1)); let rk := index(round_keys, #nat(nr(alg))); Z0 == quad32_xor(rk, plain0); Z1 == quad32_xor(rk, plain1); Z2 == quad32_xor(rk, plain2); Xi == quad32_xor(rk, plain3); T2 == quad32_xor(rk, plain4); Hkey == quad32_xor(rk, plain5); ensures modifies_buffer_specific128(out_b, old(heap6), heap6, count*6+0, count*6+5); slice(buffer128_as_seq(heap6, out_b), 0, 6 * count) == old(slice(buffer128_as_seq(heap6, out_b), 0, 6 * count)); outp == old(outp) + 0x60; buffer128_read(out_b, count*6 + 0, heap6) == inout0; buffer128_read(out_b, count*6 + 1, heap6) == inout1; buffer128_read(out_b, count*6 + 2, heap6) == inout2; buffer128_read(out_b, count*6 + 3, heap6) == inout3; buffer128_read(out_b, count*6 + 4, heap6) == inout4; buffer128_read(out_b, count*6 + 5, heap6) == inout5; inout0 == quad32_xor(plain0, aes_encrypt_LE(alg, key_words, ctr0)); inout1 == quad32_xor(plain1, aes_encrypt_LE(alg, key_words, ctr1)); inout2 == quad32_xor(plain2, aes_encrypt_LE(alg, key_words, ctr2)); inout3 == quad32_xor(plain3, aes_encrypt_LE(alg, key_words, ctr3)); inout4 == quad32_xor(plain4, aes_encrypt_LE(alg, key_words, ctr4)); inout5 == quad32_xor(plain5, aes_encrypt_LE(alg, key_words, ctr5)); { VAESNI_enc_last(inout0, inout0, Z0); VAESNI_enc_last(inout1, inout1, Z1); VAESNI_enc_last(inout2, inout2, Z2); VAESNI_enc_last(inout3, inout3, Xi); VAESNI_enc_last(inout4, inout4, T2); VAESNI_enc_last(inout5, inout5, Hkey); Store128_buffer(heap6, outp, inout0, 0x00, Secret, out_b, count*6 + 0); Store128_buffer(heap6, outp, inout1, 0x10, Secret, out_b, count*6 + 1); Store128_buffer(heap6, outp, inout2, 0x20, Secret, out_b, count*6 + 2); Store128_buffer(heap6, outp, inout3, 0x30, Secret, out_b, count*6 + 3); Store128_buffer(heap6, outp, inout4, 0x40, Secret, out_b, count*6 + 4); Store128_buffer(heap6, outp, inout5, 0x50, Secret, out_b, count*6 + 5); AddLea64(outp, outp, 0x60); lemma_quad32_xor_commutes_forall(); finish_cipher_opt(alg, ctr0, plain0, init0, old(inout0), inout0, round_keys); finish_cipher_opt(alg, ctr1, plain1, init1, old(inout1), inout1, round_keys); finish_cipher_opt(alg, ctr2, plain2, init2, old(inout2), inout2, round_keys); finish_cipher_opt(alg, ctr3, plain3, init3, old(inout3), inout3, round_keys); finish_cipher_opt(alg, ctr4, plain4, init4, old(inout4), inout4, round_keys); finish_cipher_opt(alg, ctr5, plain5, init5, old(inout5), inout5, round_keys); finish_aes_encrypt_le(alg, ctr0, key_words); finish_aes_encrypt_le(alg, ctr1, key_words); finish_aes_encrypt_le(alg, ctr2, key_words); finish_aes_encrypt_le(alg, ctr3, key_words); finish_aes_encrypt_le(alg, ctr4, key_words); finish_aes_encrypt_le(alg, ctr5, key_words); } #reset-options "--z3rlimit 20" procedure AESNI_ctr32_6x( inline alg:algorithm, ghost count:nat, ghost in_b:buffer128, ghost out_b:buffer128, ghost plain_quads:seq(quad32), ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost ctr_BE:quad32, ghost ctr_BE_orig:quad32) {:quick} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; inp @= rdi; outp @= rsi; key @= rcx; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z0 @= xmm4; Z1 @= xmm5; Z2 @= xmm6; Xi @= xmm8; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; constp @= r11; reads key; memLayout; modifies inp; outp; constp; r12; T1; T2; Hkey; Z0; Z1; Z2; Xi; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; heap0; efl; requires // Valid buffers and pointers validSrcAddrsOffset128(heap6, inp, in_b, count*6, 6, memLayout, Secret); validDstAddrsOffset128(heap6, outp, out_b, count*6, 6, memLayout, Secret); partial_seq_agreement(plain_quads, buffer128_as_seq(heap6, in_b), count*6, count*6+6); inp + 0x60 < pow2_64; outp + 0x60 < pow2_64; // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); key - 0x60 >= 0; // Counter requirements count*6 + 6 < pow2_32; ctr_BE == inc32lite(ctr_BE_orig, #nat32(count*6)); ctr_BE.lo0 % 256 + 6 < 256; T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 0)); // GCTR progress gctr_partial(alg, 6*count, plain_quads, buffer128_as_seq(heap6, out_b), key_words, ctr_BE_orig); ensures modifies_buffer_specific128(out_b, old(heap6), heap6, count*6+0, count*6+5); slice(buffer128_as_seq(heap6, out_b), 0, 6 * count) == old(slice(buffer128_as_seq(heap6, out_b), 0, 6 * count)); inp == old( inp) + 0x60; outp == old(outp) + 0x60; T1 == reverse_bytes_quad32(inc32lite(ctr_BE, 6)); buffer128_read(out_b, count*6 + 0, heap6) == inout0; buffer128_read(out_b, count*6 + 1, heap6) == inout1; buffer128_read(out_b, count*6 + 2, heap6) == inout2; buffer128_read(out_b, count*6 + 3, heap6) == inout3; buffer128_read(out_b, count*6 + 4, heap6) == inout4; buffer128_read(out_b, count*6 + 5, heap6) == inout5; // GCTR progress gctr_partial(alg, 6*(count+1), plain_quads, buffer128_as_seq(heap6, out_b), key_words, ctr_BE_orig); { let init0 := quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 0)), index(round_keys, 0)); let init1 := quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 1)), index(round_keys, 0)); let init2 := quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 2)), index(round_keys, 0)); let init3 := quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 3)), index(round_keys, 0)); let init4 := quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 4)), index(round_keys, 0)); let init5 := quad32_xor(reverse_bytes_quad32(inc32lite(ctr_BE, 5)), index(round_keys, 0)); AESNI_ctr32_6x_preamble(alg, key_words, round_keys, keys_b, ctr_BE); AESNI_ctr32_6x_loop_recursive(alg, 7, key_words, round_keys, keys_b, init0, init1, init2, init3, init4, init5); AESNI_ctr32_6x_round9(alg, count, in_b, key_words, round_keys, keys_b, init0, init1, init2, init3, init4, init5); AESNI_ctr32_6x_final(alg, count, out_b, key_words, round_keys, keys_b, init0, init1, init2, init3, init4, init5, reverse_bytes_quad32(inc32lite(ctr_BE, 0)), reverse_bytes_quad32(inc32lite(ctr_BE, 1)), reverse_bytes_quad32(inc32lite(ctr_BE, 2)), reverse_bytes_quad32(inc32lite(ctr_BE, 3)), reverse_bytes_quad32(inc32lite(ctr_BE, 4)), reverse_bytes_quad32(inc32lite(ctr_BE, 5)), buffer128_read(in_b, count*6 + 0, heap6), buffer128_read(in_b, count*6 + 1, heap6), buffer128_read(in_b, count*6 + 2, heap6), buffer128_read(in_b, count*6 + 3, heap6), buffer128_read(in_b, count*6 + 4, heap6), buffer128_read(in_b, count*6 + 5, heap6)); let plain := old(buffer128_as_seq(heap6, in_b)); let cipher := buffer128_as_seq(heap6, out_b); let bound := count*6; gctr_partial_opaque_ignores_postfix(alg, #nat32(bound), plain_quads, plain_quads, old(buffer128_as_seq(heap6, out_b)), cipher, key_words, ctr_BE_orig); gctr_partial_extend6(alg, bound, plain_quads, cipher, key_words, ctr_BE_orig); } #reset-options "--z3rlimit 20" procedure Ctr32_ghash_6_prelude( inline alg:algorithm, ghost scratch_b:buffer128, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128, ghost ctr_orig:quad32) {:quick} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; key @= rcx; T1 @= xmm1; T2 @= xmm2; Z0 @= xmm4; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; constp @= r11; reads key; rsp; T1; memLayout; modifies T2; Z0; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; constp; heap0; efl; requires // Valid buffers and pointers validDstAddrs128(heap3, rsp, scratch_b, 8, memLayout, Secret); // AES reqs aes_reqs0(alg, key_words, round_keys, keys_b, key, heap0, memLayout); // Counters ctr_orig.lo0 % 256 + 6 < 256; T1 == reverse_bytes_quad32(inc32lite(ctr_orig, 0)); ensures modifies_buffer_specific128(scratch_b, old(heap3), heap3, 1, 1); T2 == Mkfour(0, 0, 0, 0x1000000); rndkey == index(round_keys, 0); inout0 == quad32_xor(reverse_bytes_quad32(inc32lite(ctr_orig, 0)), rndkey); inout1 == reverse_bytes_quad32(inc32lite(ctr_orig, 1)); inout2 == reverse_bytes_quad32(inc32lite(ctr_orig, 2)); inout3 == reverse_bytes_quad32(inc32lite(ctr_orig, 3)); inout4 == reverse_bytes_quad32(inc32lite(ctr_orig, 4)); inout5 == reverse_bytes_quad32(inc32lite(ctr_orig, 5)); buffer128_read(scratch_b, 1, heap3) == Mkfour(0, 0, 0, 0); { load_one_msb(); // # borrow $T2, .Lone_msb VPxor(Z0, Z0, Z0); // # $Z0 = 0 lemma_quad32_xor(); Load128_buffer(heap0, rndkey, key, 0x00-0x80, Secret, keys_b, 0); VPaddd(inout1, T1, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout1, 1); VPaddd(inout2, inout1, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout2, 2); VPaddd(inout3, inout2, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout3, 3); VPaddd(inout4, inout3, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout4, 4); VPaddd(inout5, inout4, T2); lemma_incr_msb(inc32lite(ctr_orig, 0), old(T1), inout5, 5); VPxor(inout0, T1, rndkey); Store128_buffer(heap3, rsp, Z0, 0x10, Secret, scratch_b, 1); // # "$Z3" = 0 } #reset-options "" procedure Encrypt_save_and_shuffle_output(ghost count:nat, ghost out_b:buffer128) {:quick} lets outp @= rsi; Ii @= xmm0; T1 @= xmm1; Z3 @= xmm7; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; reads outp; Ii; Z3; memLayout; modifies T1; inout0; inout1; inout2; inout3; inout4; inout5; heap0; efl; requires // Valid ptrs and buffers validDstAddrsOffset128(heap6, outp - 0x60, out_b, count*6, 6, memLayout, Secret); Ii == Mkfour(0x0C0D0E0F, 0x08090A0B, 0x04050607, 0x00010203); ensures modifies_buffer_specific128(out_b, old(heap6), heap6, count*6+0, count*6+5); slice(buffer128_as_seq(heap6, out_b), 0, 6 * count) == old(slice(buffer128_as_seq(heap6, out_b), 0, 6 * count)); buffer128_read(out_b, count*6 + 0, heap6) == old(inout0); buffer128_read(out_b, count*6 + 1, heap6) == old(inout1); buffer128_read(out_b, count*6 + 2, heap6) == old(inout2); buffer128_read(out_b, count*6 + 3, heap6) == old(inout3); buffer128_read(out_b, count*6 + 4, heap6) == old(inout4); buffer128_read(out_b, count*6 + 5, heap6) == old(inout5); T1 == quad32_xor(old(T1), Z3); inout0 == reverse_bytes_quad32(old(inout0)); inout1 == reverse_bytes_quad32(old(inout1)); inout2 == reverse_bytes_quad32(old(inout2)); inout3 == reverse_bytes_quad32(old(inout3)); inout4 == reverse_bytes_quad32(old(inout4)); inout5 == reverse_bytes_quad32(old(inout5)); { Store128_buffer(heap6, outp, inout0, 0-0x60, Secret, out_b, count*6 + 0); VPshufb(inout0, inout0, Ii); VPxor(T1, T1, Z3); Store128_buffer(heap6, outp, inout1, 0-0x50, Secret, out_b, count*6 + 1); VPshufb(inout1, inout1, Ii); Store128_buffer(heap6, outp, inout2, 0-0x40, Secret, out_b, count*6 + 2); VPshufb(inout2, inout2, Ii); Store128_buffer(heap6, outp, inout3, 0-0x30, Secret, out_b, count*6 + 3); VPshufb(inout3, inout3, Ii); Store128_buffer(heap6, outp, inout4, 0-0x20, Secret, out_b, count*6 + 4); VPshufb(inout4, inout4, Ii); Store128_buffer(heap6, outp, inout5, 0-0x10, Secret, out_b, count*6 + 5); VPshufb(inout5, inout5, Ii); } #reset-options "--z3rlimit 30" procedure AES_GCM_encrypt( inline alg:algorithm, ghost iv_b:buffer128, ghost in_b:buffer128, ghost out_b:buffer128, ghost scratch_b:buffer128, ghost key_words:seq(nat32), ghost round_keys:seq(quad32), ghost keys_b:buffer128) returns( ghost ctr_BE:quad32) {:quick} lets // inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Xip @= r9; inp @= rdi; outp @= rsi; len @= rdx; key @= rcx; ivp @= r8; Ii @= xmm0; T1 @= xmm1; T2 @= xmm2; Hkey @= xmm3; Z0 @= xmm4; Z1 @= xmm5; Z2 @= xmm6; Z3 @= xmm7; Xi @= xmm8; inout0 @= xmm9; inout1 @= xmm10; inout2 @= xmm11; inout3 @= xmm12; inout4 @= xmm13; inout5 @= xmm14; rndkey @= xmm15; // counter @= rbx; rounds @= rbp; ret @= r10; constp @= r11; in0 @= r14; end0 @= r15; counter @= rbx; constp @= r11; in0 @= r14; reads rax; rbp; r9; r10; r15; ivp; rsp; memLayout; modifies inp; outp; len; key; counter; constp; r12; r13; in0; Ii; T1; T2; Hkey; Z0; Z1; Z2; Z3; Xi; inout0; inout1; inout2; inout3; inout4; inout5; rndkey; heap0; efl; requires // Valid buffers and pointers validDstAddrs128(heap0, ivp, iv_b, 1, memLayout, Secret); validSrcAddrs128(heap6, inp, in_b, len, memLayout, Secret); validDstAddrs128(heap6, outp, out_b, len, memLayout, Secret); validDstAddrs128(heap3, rsp, scratch_b, 8, memLayout, Secret); buffers_disjoint128(iv_b, keys_b); buffers_disjoint128(iv_b, scratch_b); buffers_disjoint128(iv_b, in_b); buffers_disjoint128(iv_b, out_b); buffers_disjoint128(scratch_b, keys_b); buffers_disjoint128(scratch_b, in_b); buffers_disjoint128(scratch_b, out_b); buffers_disjoint128(out_b, keys_b); buffers_disjoint128(in_b, out_b) || in_b == out_b; inp + 0x10*len < pow2_64; outp + 0x10*len < pow2_64; buffer_length(in_b) == buffer_length(out_b); buffer_length(in_b) == len; 4096 * (buffer_length(in_b)) * 16 < pow2_32; // AES reqs buffer_addr(keys_b, heap0) + 0x80 < pow2_64; aes_reqs0(alg, key_words, round_keys, keys_b, key + 0x80, heap0, memLayout); // Len is # of 128-bit blocks len % 6 == 0; len >= 0x10*3; len / 6 >= 3; // Should be implied by above... :( 12 + len + 6 < pow2_32; ensures let iv_LE := old(buffer128_read(iv_b, 0, heap0)); let iv_BE := reverse_bytes_quad32(iv_LE); ctr_BE == Mkfour(2, iv_BE.lo1, iv_BE.hi2, iv_BE.hi3); //is_gctr_plain_LE(le_seq_quad32_to_bytes(old(buffer128_as_seq(heap6, in_b)))) && is_aes_key_LE(alg, key_words); le_seq_quad32_to_bytes(buffer128_as_seq(heap6, out_b)) == gctr_encrypt_LE(ctr_BE, le_seq_quad32_to_bytes(old(buffer128_as_seq(heap6, in_b))), alg, key_words); { let plain_quads := buffer128_as_seq(heap6, in_b); Load128_buffer(heap0, T1, ivp, 0, Secret, iv_b, 0); // # input counter value let iv_LE := T1; //assert iv_LE == old(buffer128_read(iv_b, 0, heap0)); Add64(key, 0x80); // # size optimization // TODO: Replace all of this with a single PinsrdImm(T1, 0x02000000, 3, counter) InitPshufbMask(Ii, r12); // # borrow $Ii for .Lbswap_mask VPshufb(T1, T1, Ii); let iv_BE := T1; //assert iv_BE == reverse_bytes_quad32(iv_LE); PinsrdImm(T1, 2, 0, counter); // Directly set the counter in T1.lo0 (and counter) to 2, rather than read the existing value as OpenSSL does //let ctr_BE := T1; ctr_BE := T1; //assert ctr_BE == Mkfour(2, iv_BE.lo1, iv_BE.hi2, iv_BE.hi3); VPshufb(T1, T1, Ii); Mov64(in0, outp); gctr_partial_opaque_init(alg, buffer128_as_seq(heap6, in_b), buffer128_as_seq(heap6, out_b), key_words, ctr_BE); AESNI_ctr32_6x(alg, 0, in_b, out_b, plain_quads, key_words, round_keys, keys_b, ctr_BE, ctr_BE); // # save bswapped output on stack VPshufb(Xi, inout0, Ii); VPshufb(T2, inout1, Ii); Store128_buffer(heap3, rsp, Xi, 0x70, Secret, scratch_b, 7); VPshufb(Z0, inout2, Ii); Store128_buffer(heap3, rsp, T2, 0x60, Secret, scratch_b, 6); VPshufb(Z1, inout3, Ii); Store128_buffer(heap3, rsp, Z0, 0x50, Secret, scratch_b, 5); VPshufb(Z2, inout4, Ii); Store128_buffer(heap3, rsp, Z1, 0x40, Secret, scratch_b, 4); VPshufb(Z3, inout5, Ii); // # passed to _aesni_ctr32_ghash_6x Store128_buffer(heap3, rsp, Z2, 0x30, Secret, scratch_b, 3); AESNI_ctr32_6x(alg, 1, in_b, out_b, plain_quads, key_words, round_keys, keys_b, inc32(ctr_BE, 6), ctr_BE); Sub64(len, 12); Ctr32_ghash_6_prelude(alg, scratch_b, key_words, round_keys, keys_b, inc32(ctr_BE, 12)); Mov64(counter, 14); let mid_len := len; assert mid_len == old(len) - 12; Loop6x_loop(alg, 2, iv_b, out_b, in_b, out_b, scratch_b, old(buffer128_as_seq(heap6, in_b)), key_words, round_keys, keys_b, ctr_BE, inc32(ctr_BE, 12)); let out_snapshot := buffer128_as_seq(heap6, out_b); InitPshufbMask(Ii, r12); // # borrow $Ii for .Lbswap_mask let offset_in := #nat((old(len) / 6) - 1); Encrypt_save_and_shuffle_output(offset_in, out_b); gctr_partial_opaque_ignores_postfix(alg, #nat32(12+mid_len - 6), old(buffer128_as_seq(heap6, in_b)), old(buffer128_as_seq(heap6, in_b)), out_snapshot, buffer128_as_seq(heap6, out_b),key_words, ctr_BE); gctr_partial_extend6(alg, 12+mid_len - 6, old(buffer128_as_seq(heap6, in_b)), buffer128_as_seq(heap6, out_b), key_words, ctr_BE); //assert gctr_partial(alg, old(len), old(buffer128_as_seq(heap6, in_b)), buffer128_as_seq(heap6, out_b), key_words, ctr_BE); gctr_partial_opaque_completed(alg, old(buffer128_as_seq(heap6, in_b)), buffer128_as_seq(heap6, out_b), key_words, ctr_BE); gctr_partial_to_full_basic(ctr_BE, old(buffer128_as_seq(heap6, in_b)), alg, key_words, buffer128_as_seq(heap6, out_b)); //assert le_seq_quad32_to_bytes(buffer128_as_seq(heap6, out_b)) == gctr_encrypt_LE(ctr_BE, le_seq_quad32_to_bytes(old(buffer128_as_seq(heap6, in_b))), alg, key_words); } */