; This file is generated from a similarly-named Perl script in the BoringSSL ; source tree. Do not edit by hand. %ifidn __OUTPUT_FORMAT__, win64 default rel %define XMMWORD %define YMMWORD %define ZMMWORD %ifdef BORINGSSL_PREFIX %include "boringssl_prefix_symbols_nasm.inc" %endif section .rdata rdata align=8 ALIGN 16 one: DQ 1,0 two: DQ 2,0 three: DQ 3,0 four: DQ 4,0 five: DQ 5,0 six: DQ 6,0 seven: DQ 7,0 eight: DQ 8,0 OR_MASK: DD 0x00000000,0x00000000,0x00000000,0x80000000 poly: DQ 0x1,0xc200000000000000 mask: DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d con1: DD 1,1,1,1 con2: DD 0x1b,0x1b,0x1b,0x1b con3: DB -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7 and_mask: DD 0,0xffffffff,0xffffffff,0xffffffff section .text code align=64 ALIGN 16 GFMUL: vpclmulqdq xmm2,xmm0,xmm1,0x00 vpclmulqdq xmm5,xmm0,xmm1,0x11 vpclmulqdq xmm3,xmm0,xmm1,0x10 vpclmulqdq xmm4,xmm0,xmm1,0x01 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm3,8 vpsrldq xmm3,xmm3,8 vpxor xmm2,xmm2,xmm4 vpxor xmm5,xmm5,xmm3 vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10 vpshufd xmm4,xmm2,78 vpxor xmm2,xmm3,xmm4 vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10 vpshufd xmm4,xmm2,78 vpxor xmm2,xmm3,xmm4 vpxor xmm0,xmm2,xmm5 DB 0F3h,0C3h ;repret global aesgcmsiv_htable_init ALIGN 16 aesgcmsiv_htable_init: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesgcmsiv_htable_init: mov rdi,rcx mov rsi,rdx vmovdqa xmm0,XMMWORD[rsi] vmovdqa xmm1,xmm0 vmovdqa XMMWORD[rdi],xmm0 call GFMUL vmovdqa XMMWORD[16+rdi],xmm0 call GFMUL vmovdqa XMMWORD[32+rdi],xmm0 call GFMUL vmovdqa XMMWORD[48+rdi],xmm0 call GFMUL vmovdqa XMMWORD[64+rdi],xmm0 call GFMUL vmovdqa XMMWORD[80+rdi],xmm0 call GFMUL vmovdqa XMMWORD[96+rdi],xmm0 call GFMUL vmovdqa XMMWORD[112+rdi],xmm0 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesgcmsiv_htable_init: global aesgcmsiv_htable6_init ALIGN 16 aesgcmsiv_htable6_init: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesgcmsiv_htable6_init: mov rdi,rcx mov rsi,rdx vmovdqa xmm0,XMMWORD[rsi] vmovdqa xmm1,xmm0 vmovdqa XMMWORD[rdi],xmm0 call GFMUL vmovdqa XMMWORD[16+rdi],xmm0 call GFMUL vmovdqa XMMWORD[32+rdi],xmm0 call GFMUL vmovdqa XMMWORD[48+rdi],xmm0 call GFMUL vmovdqa XMMWORD[64+rdi],xmm0 call GFMUL vmovdqa XMMWORD[80+rdi],xmm0 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesgcmsiv_htable6_init: global aesgcmsiv_htable_polyval ALIGN 16 aesgcmsiv_htable_polyval: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesgcmsiv_htable_polyval: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 test rdx,rdx jnz NEAR $L$htable_polyval_start mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$htable_polyval_start: vzeroall mov r11,rdx and r11,127 jz NEAR $L$htable_polyval_no_prefix vpxor xmm9,xmm9,xmm9 vmovdqa xmm1,XMMWORD[rcx] sub rdx,r11 sub r11,16 vmovdqu xmm0,XMMWORD[rsi] vpxor xmm0,xmm0,xmm1 vpclmulqdq xmm5,xmm0,XMMWORD[r11*1+rdi],0x01 vpclmulqdq xmm3,xmm0,XMMWORD[r11*1+rdi],0x00 vpclmulqdq xmm4,xmm0,XMMWORD[r11*1+rdi],0x11 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10 vpxor xmm5,xmm5,xmm6 lea rsi,[16+rsi] test r11,r11 jnz NEAR $L$htable_polyval_prefix_loop jmp NEAR $L$htable_polyval_prefix_complete ALIGN 64 $L$htable_polyval_prefix_loop: sub r11,16 vmovdqu xmm0,XMMWORD[rsi] vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x00 vpxor xmm3,xmm3,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x11 vpxor xmm4,xmm4,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x01 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10 vpxor xmm5,xmm5,xmm6 test r11,r11 lea rsi,[16+rsi] jnz NEAR $L$htable_polyval_prefix_loop $L$htable_polyval_prefix_complete: vpsrldq xmm6,xmm5,8 vpslldq xmm5,xmm5,8 vpxor xmm9,xmm4,xmm6 vpxor xmm1,xmm3,xmm5 jmp NEAR $L$htable_polyval_main_loop $L$htable_polyval_no_prefix: vpxor xmm1,xmm1,xmm1 vmovdqa xmm9,XMMWORD[rcx] ALIGN 64 $L$htable_polyval_main_loop: sub rdx,0x80 jb NEAR $L$htable_polyval_out vmovdqu xmm0,XMMWORD[112+rsi] vpclmulqdq xmm5,xmm0,XMMWORD[rdi],0x01 vpclmulqdq xmm3,xmm0,XMMWORD[rdi],0x00 vpclmulqdq xmm4,xmm0,XMMWORD[rdi],0x11 vpclmulqdq xmm6,xmm0,XMMWORD[rdi],0x10 vpxor xmm5,xmm5,xmm6 vmovdqu xmm0,XMMWORD[96+rsi] vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x01 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x00 vpxor xmm3,xmm3,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x11 vpxor xmm4,xmm4,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x10 vpxor xmm5,xmm5,xmm6 vmovdqu xmm0,XMMWORD[80+rsi] vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10 vpalignr xmm1,xmm1,xmm1,8 vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x01 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x00 vpxor xmm3,xmm3,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x11 vpxor xmm4,xmm4,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x10 vpxor xmm5,xmm5,xmm6 vpxor xmm1,xmm1,xmm7 vmovdqu xmm0,XMMWORD[64+rsi] vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x01 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x00 vpxor xmm3,xmm3,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x11 vpxor xmm4,xmm4,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x10 vpxor xmm5,xmm5,xmm6 vmovdqu xmm0,XMMWORD[48+rsi] vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10 vpalignr xmm1,xmm1,xmm1,8 vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x01 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x00 vpxor xmm3,xmm3,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x11 vpxor xmm4,xmm4,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x10 vpxor xmm5,xmm5,xmm6 vpxor xmm1,xmm1,xmm7 vmovdqu xmm0,XMMWORD[32+rsi] vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x01 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x00 vpxor xmm3,xmm3,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x11 vpxor xmm4,xmm4,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x10 vpxor xmm5,xmm5,xmm6 vpxor xmm1,xmm1,xmm9 vmovdqu xmm0,XMMWORD[16+rsi] vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x01 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x00 vpxor xmm3,xmm3,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x11 vpxor xmm4,xmm4,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x10 vpxor xmm5,xmm5,xmm6 vmovdqu xmm0,XMMWORD[rsi] vpxor xmm0,xmm0,xmm1 vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x01 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x00 vpxor xmm3,xmm3,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x11 vpxor xmm4,xmm4,xmm6 vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x10 vpxor xmm5,xmm5,xmm6 vpsrldq xmm6,xmm5,8 vpslldq xmm5,xmm5,8 vpxor xmm9,xmm4,xmm6 vpxor xmm1,xmm3,xmm5 lea rsi,[128+rsi] jmp NEAR $L$htable_polyval_main_loop $L$htable_polyval_out: vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10 vpalignr xmm1,xmm1,xmm1,8 vpxor xmm1,xmm1,xmm6 vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10 vpalignr xmm1,xmm1,xmm1,8 vpxor xmm1,xmm1,xmm6 vpxor xmm1,xmm1,xmm9 vmovdqu XMMWORD[rcx],xmm1 vzeroupper mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesgcmsiv_htable_polyval: global aesgcmsiv_polyval_horner ALIGN 16 aesgcmsiv_polyval_horner: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesgcmsiv_polyval_horner: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 test rcx,rcx jnz NEAR $L$polyval_horner_start mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$polyval_horner_start: xor r10,r10 shl rcx,4 vmovdqa xmm1,XMMWORD[rsi] vmovdqa xmm0,XMMWORD[rdi] $L$polyval_horner_loop: vpxor xmm0,xmm0,XMMWORD[r10*1+rdx] call GFMUL add r10,16 cmp rcx,r10 jne NEAR $L$polyval_horner_loop vmovdqa XMMWORD[rdi],xmm0 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesgcmsiv_polyval_horner: global aes128gcmsiv_aes_ks ALIGN 16 aes128gcmsiv_aes_ks: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes128gcmsiv_aes_ks: mov rdi,rcx mov rsi,rdx vmovdqu xmm1,XMMWORD[rdi] vmovdqa XMMWORD[rsi],xmm1 vmovdqa xmm0,XMMWORD[con1] vmovdqa xmm15,XMMWORD[mask] mov rax,8 $L$ks128_loop: add rsi,16 sub rax,1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpslldq xmm3,xmm1,4 vpxor xmm1,xmm1,xmm3 vpslldq xmm3,xmm3,4 vpxor xmm1,xmm1,xmm3 vpslldq xmm3,xmm3,4 vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vmovdqa XMMWORD[rsi],xmm1 jne NEAR $L$ks128_loop vmovdqa xmm0,XMMWORD[con2] vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpslldq xmm3,xmm1,4 vpxor xmm1,xmm1,xmm3 vpslldq xmm3,xmm3,4 vpxor xmm1,xmm1,xmm3 vpslldq xmm3,xmm3,4 vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vmovdqa XMMWORD[16+rsi],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslldq xmm3,xmm1,4 vpxor xmm1,xmm1,xmm3 vpslldq xmm3,xmm3,4 vpxor xmm1,xmm1,xmm3 vpslldq xmm3,xmm3,4 vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vmovdqa XMMWORD[32+rsi],xmm1 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes128gcmsiv_aes_ks: global aes256gcmsiv_aes_ks ALIGN 16 aes256gcmsiv_aes_ks: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes256gcmsiv_aes_ks: mov rdi,rcx mov rsi,rdx vmovdqu xmm1,XMMWORD[rdi] vmovdqu xmm3,XMMWORD[16+rdi] vmovdqa XMMWORD[rsi],xmm1 vmovdqa XMMWORD[16+rsi],xmm3 vmovdqa xmm0,XMMWORD[con1] vmovdqa xmm15,XMMWORD[mask] vpxor xmm14,xmm14,xmm14 mov rax,6 $L$ks256_loop: add rsi,32 sub rax,1 vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm4,xmm1,32 vpxor xmm1,xmm1,xmm4 vpshufb xmm4,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vmovdqa XMMWORD[rsi],xmm1 vpshufd xmm2,xmm1,0xff vaesenclast xmm2,xmm2,xmm14 vpsllq xmm4,xmm3,32 vpxor xmm3,xmm3,xmm4 vpshufb xmm4,xmm3,XMMWORD[con3] vpxor xmm3,xmm3,xmm4 vpxor xmm3,xmm3,xmm2 vmovdqa XMMWORD[16+rsi],xmm3 jne NEAR $L$ks256_loop vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpsllq xmm4,xmm1,32 vpxor xmm1,xmm1,xmm4 vpshufb xmm4,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vmovdqa XMMWORD[32+rsi],xmm1 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret global aes128gcmsiv_aes_ks_enc_x1 ALIGN 16 aes128gcmsiv_aes_ks_enc_x1: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 vmovdqa xmm1,XMMWORD[rcx] vmovdqa xmm4,XMMWORD[rdi] vmovdqa XMMWORD[rdx],xmm1 vpxor xmm4,xmm4,xmm1 vmovdqa xmm0,XMMWORD[con1] vmovdqa xmm15,XMMWORD[mask] vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[16+rdx],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[32+rdx],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[48+rdx],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[64+rdx],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[80+rdx],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[96+rdx],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[112+rdx],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[128+rdx],xmm1 vmovdqa xmm0,XMMWORD[con2] vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenc xmm4,xmm4,xmm1 vmovdqa XMMWORD[144+rdx],xmm1 vpshufb xmm2,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm0 vpsllq xmm3,xmm1,32 vpxor xmm1,xmm1,xmm3 vpshufb xmm3,xmm1,XMMWORD[con3] vpxor xmm1,xmm1,xmm3 vpxor xmm1,xmm1,xmm2 vaesenclast xmm4,xmm4,xmm1 vmovdqa XMMWORD[160+rdx],xmm1 vmovdqa XMMWORD[rsi],xmm4 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes128gcmsiv_aes_ks_enc_x1: global aes128gcmsiv_kdf ALIGN 16 aes128gcmsiv_kdf: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes128gcmsiv_kdf: mov rdi,rcx mov rsi,rdx mov rdx,r8 vmovdqa xmm1,XMMWORD[rdx] vmovdqa xmm9,XMMWORD[rdi] vmovdqa xmm12,XMMWORD[and_mask] vmovdqa xmm13,XMMWORD[one] vpshufd xmm9,xmm9,0x90 vpand xmm9,xmm9,xmm12 vpaddd xmm10,xmm9,xmm13 vpaddd xmm11,xmm10,xmm13 vpaddd xmm12,xmm11,xmm13 vpxor xmm9,xmm9,xmm1 vpxor xmm10,xmm10,xmm1 vpxor xmm11,xmm11,xmm1 vpxor xmm12,xmm12,xmm1 vmovdqa xmm1,XMMWORD[16+rdx] vaesenc xmm9,xmm9,xmm1 vaesenc xmm10,xmm10,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vmovdqa xmm2,XMMWORD[32+rdx] vaesenc xmm9,xmm9,xmm2 vaesenc xmm10,xmm10,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vmovdqa xmm1,XMMWORD[48+rdx] vaesenc xmm9,xmm9,xmm1 vaesenc xmm10,xmm10,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vmovdqa xmm2,XMMWORD[64+rdx] vaesenc xmm9,xmm9,xmm2 vaesenc xmm10,xmm10,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vmovdqa xmm1,XMMWORD[80+rdx] vaesenc xmm9,xmm9,xmm1 vaesenc xmm10,xmm10,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vmovdqa xmm2,XMMWORD[96+rdx] vaesenc xmm9,xmm9,xmm2 vaesenc xmm10,xmm10,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vmovdqa xmm1,XMMWORD[112+rdx] vaesenc xmm9,xmm9,xmm1 vaesenc xmm10,xmm10,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vmovdqa xmm2,XMMWORD[128+rdx] vaesenc xmm9,xmm9,xmm2 vaesenc xmm10,xmm10,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vmovdqa xmm1,XMMWORD[144+rdx] vaesenc xmm9,xmm9,xmm1 vaesenc xmm10,xmm10,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vmovdqa xmm2,XMMWORD[160+rdx] vaesenclast xmm9,xmm9,xmm2 vaesenclast xmm10,xmm10,xmm2 vaesenclast xmm11,xmm11,xmm2 vaesenclast xmm12,xmm12,xmm2 vmovdqa XMMWORD[rsi],xmm9 vmovdqa XMMWORD[16+rsi],xmm10 vmovdqa XMMWORD[32+rsi],xmm11 vmovdqa XMMWORD[48+rsi],xmm12 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes128gcmsiv_kdf: global aes128gcmsiv_enc_msg_x4 ALIGN 16 aes128gcmsiv_enc_msg_x4: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes128gcmsiv_enc_msg_x4: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] test r8,r8 jnz NEAR $L$128_enc_msg_x4_start mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$128_enc_msg_x4_start: push r12 push r13 shr r8,4 mov r10,r8 shl r10,62 shr r10,62 vmovdqa xmm15,XMMWORD[rdx] vpor xmm15,xmm15,XMMWORD[OR_MASK] vmovdqu xmm4,XMMWORD[four] vmovdqa xmm0,xmm15 vpaddd xmm1,xmm15,XMMWORD[one] vpaddd xmm2,xmm15,XMMWORD[two] vpaddd xmm3,xmm15,XMMWORD[three] shr r8,2 je NEAR $L$128_enc_msg_x4_check_remainder sub rsi,64 sub rdi,64 $L$128_enc_msg_x4_loop1: add rsi,64 add rdi,64 vmovdqa xmm5,xmm0 vmovdqa xmm6,xmm1 vmovdqa xmm7,xmm2 vmovdqa xmm8,xmm3 vpxor xmm5,xmm5,XMMWORD[rcx] vpxor xmm6,xmm6,XMMWORD[rcx] vpxor xmm7,xmm7,XMMWORD[rcx] vpxor xmm8,xmm8,XMMWORD[rcx] vmovdqu xmm12,XMMWORD[16+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vpaddd xmm0,xmm0,xmm4 vmovdqu xmm12,XMMWORD[32+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vpaddd xmm1,xmm1,xmm4 vmovdqu xmm12,XMMWORD[48+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vpaddd xmm2,xmm2,xmm4 vmovdqu xmm12,XMMWORD[64+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vpaddd xmm3,xmm3,xmm4 vmovdqu xmm12,XMMWORD[80+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[96+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[112+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[128+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[144+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[160+rcx] vaesenclast xmm5,xmm5,xmm12 vaesenclast xmm6,xmm6,xmm12 vaesenclast xmm7,xmm7,xmm12 vaesenclast xmm8,xmm8,xmm12 vpxor xmm5,xmm5,XMMWORD[rdi] vpxor xmm6,xmm6,XMMWORD[16+rdi] vpxor xmm7,xmm7,XMMWORD[32+rdi] vpxor xmm8,xmm8,XMMWORD[48+rdi] sub r8,1 vmovdqu XMMWORD[rsi],xmm5 vmovdqu XMMWORD[16+rsi],xmm6 vmovdqu XMMWORD[32+rsi],xmm7 vmovdqu XMMWORD[48+rsi],xmm8 jne NEAR $L$128_enc_msg_x4_loop1 add rsi,64 add rdi,64 $L$128_enc_msg_x4_check_remainder: cmp r10,0 je NEAR $L$128_enc_msg_x4_out $L$128_enc_msg_x4_loop2: vmovdqa xmm5,xmm0 vpaddd xmm0,xmm0,XMMWORD[one] vpxor xmm5,xmm5,XMMWORD[rcx] vaesenc xmm5,xmm5,XMMWORD[16+rcx] vaesenc xmm5,xmm5,XMMWORD[32+rcx] vaesenc xmm5,xmm5,XMMWORD[48+rcx] vaesenc xmm5,xmm5,XMMWORD[64+rcx] vaesenc xmm5,xmm5,XMMWORD[80+rcx] vaesenc xmm5,xmm5,XMMWORD[96+rcx] vaesenc xmm5,xmm5,XMMWORD[112+rcx] vaesenc xmm5,xmm5,XMMWORD[128+rcx] vaesenc xmm5,xmm5,XMMWORD[144+rcx] vaesenclast xmm5,xmm5,XMMWORD[160+rcx] vpxor xmm5,xmm5,XMMWORD[rdi] vmovdqu XMMWORD[rsi],xmm5 add rdi,16 add rsi,16 sub r10,1 jne NEAR $L$128_enc_msg_x4_loop2 $L$128_enc_msg_x4_out: pop r13 pop r12 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes128gcmsiv_enc_msg_x4: global aes128gcmsiv_enc_msg_x8 ALIGN 16 aes128gcmsiv_enc_msg_x8: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes128gcmsiv_enc_msg_x8: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] test r8,r8 jnz NEAR $L$128_enc_msg_x8_start mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$128_enc_msg_x8_start: push r12 push r13 push rbp mov rbp,rsp sub rsp,128 and rsp,-64 shr r8,4 mov r10,r8 shl r10,61 shr r10,61 vmovdqu xmm1,XMMWORD[rdx] vpor xmm1,xmm1,XMMWORD[OR_MASK] vpaddd xmm0,xmm1,XMMWORD[seven] vmovdqu XMMWORD[rsp],xmm0 vpaddd xmm9,xmm1,XMMWORD[one] vpaddd xmm10,xmm1,XMMWORD[two] vpaddd xmm11,xmm1,XMMWORD[three] vpaddd xmm12,xmm1,XMMWORD[four] vpaddd xmm13,xmm1,XMMWORD[five] vpaddd xmm14,xmm1,XMMWORD[six] vmovdqa xmm0,xmm1 shr r8,3 je NEAR $L$128_enc_msg_x8_check_remainder sub rsi,128 sub rdi,128 $L$128_enc_msg_x8_loop1: add rsi,128 add rdi,128 vmovdqa xmm1,xmm0 vmovdqa xmm2,xmm9 vmovdqa xmm3,xmm10 vmovdqa xmm4,xmm11 vmovdqa xmm5,xmm12 vmovdqa xmm6,xmm13 vmovdqa xmm7,xmm14 vmovdqu xmm8,XMMWORD[rsp] vpxor xmm1,xmm1,XMMWORD[rcx] vpxor xmm2,xmm2,XMMWORD[rcx] vpxor xmm3,xmm3,XMMWORD[rcx] vpxor xmm4,xmm4,XMMWORD[rcx] vpxor xmm5,xmm5,XMMWORD[rcx] vpxor xmm6,xmm6,XMMWORD[rcx] vpxor xmm7,xmm7,XMMWORD[rcx] vpxor xmm8,xmm8,XMMWORD[rcx] vmovdqu xmm15,XMMWORD[16+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vmovdqu xmm14,XMMWORD[rsp] vpaddd xmm14,xmm14,XMMWORD[eight] vmovdqu XMMWORD[rsp],xmm14 vmovdqu xmm15,XMMWORD[32+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpsubd xmm14,xmm14,XMMWORD[one] vmovdqu xmm15,XMMWORD[48+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm0,xmm0,XMMWORD[eight] vmovdqu xmm15,XMMWORD[64+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm9,xmm9,XMMWORD[eight] vmovdqu xmm15,XMMWORD[80+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm10,xmm10,XMMWORD[eight] vmovdqu xmm15,XMMWORD[96+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm11,xmm11,XMMWORD[eight] vmovdqu xmm15,XMMWORD[112+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm12,xmm12,XMMWORD[eight] vmovdqu xmm15,XMMWORD[128+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm13,xmm13,XMMWORD[eight] vmovdqu xmm15,XMMWORD[144+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vmovdqu xmm15,XMMWORD[160+rcx] vaesenclast xmm1,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm15 vaesenclast xmm3,xmm3,xmm15 vaesenclast xmm4,xmm4,xmm15 vaesenclast xmm5,xmm5,xmm15 vaesenclast xmm6,xmm6,xmm15 vaesenclast xmm7,xmm7,xmm15 vaesenclast xmm8,xmm8,xmm15 vpxor xmm1,xmm1,XMMWORD[rdi] vpxor xmm2,xmm2,XMMWORD[16+rdi] vpxor xmm3,xmm3,XMMWORD[32+rdi] vpxor xmm4,xmm4,XMMWORD[48+rdi] vpxor xmm5,xmm5,XMMWORD[64+rdi] vpxor xmm6,xmm6,XMMWORD[80+rdi] vpxor xmm7,xmm7,XMMWORD[96+rdi] vpxor xmm8,xmm8,XMMWORD[112+rdi] dec r8 vmovdqu XMMWORD[rsi],xmm1 vmovdqu XMMWORD[16+rsi],xmm2 vmovdqu XMMWORD[32+rsi],xmm3 vmovdqu XMMWORD[48+rsi],xmm4 vmovdqu XMMWORD[64+rsi],xmm5 vmovdqu XMMWORD[80+rsi],xmm6 vmovdqu XMMWORD[96+rsi],xmm7 vmovdqu XMMWORD[112+rsi],xmm8 jne NEAR $L$128_enc_msg_x8_loop1 add rsi,128 add rdi,128 $L$128_enc_msg_x8_check_remainder: cmp r10,0 je NEAR $L$128_enc_msg_x8_out $L$128_enc_msg_x8_loop2: vmovdqa xmm1,xmm0 vpaddd xmm0,xmm0,XMMWORD[one] vpxor xmm1,xmm1,XMMWORD[rcx] vaesenc xmm1,xmm1,XMMWORD[16+rcx] vaesenc xmm1,xmm1,XMMWORD[32+rcx] vaesenc xmm1,xmm1,XMMWORD[48+rcx] vaesenc xmm1,xmm1,XMMWORD[64+rcx] vaesenc xmm1,xmm1,XMMWORD[80+rcx] vaesenc xmm1,xmm1,XMMWORD[96+rcx] vaesenc xmm1,xmm1,XMMWORD[112+rcx] vaesenc xmm1,xmm1,XMMWORD[128+rcx] vaesenc xmm1,xmm1,XMMWORD[144+rcx] vaesenclast xmm1,xmm1,XMMWORD[160+rcx] vpxor xmm1,xmm1,XMMWORD[rdi] vmovdqu XMMWORD[rsi],xmm1 add rdi,16 add rsi,16 dec r10 jne NEAR $L$128_enc_msg_x8_loop2 $L$128_enc_msg_x8_out: mov rsp,rbp pop rbp pop r13 pop r12 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes128gcmsiv_enc_msg_x8: global aes128gcmsiv_dec ALIGN 16 aes128gcmsiv_dec: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes128gcmsiv_dec: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] mov r9,QWORD[48+rsp] test r9,~15 jnz NEAR $L$128_dec_start mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$128_dec_start: vzeroupper vmovdqa xmm0,XMMWORD[rdx] mov rax,rdx lea rax,[32+rax] lea rcx,[32+rcx] vmovdqu xmm15,XMMWORD[r9*1+rdi] vpor xmm15,xmm15,XMMWORD[OR_MASK] and r9,~15 cmp r9,96 jb NEAR $L$128_dec_loop2 sub r9,96 vmovdqa xmm7,xmm15 vpaddd xmm8,xmm7,XMMWORD[one] vpaddd xmm9,xmm7,XMMWORD[two] vpaddd xmm10,xmm9,XMMWORD[one] vpaddd xmm11,xmm9,XMMWORD[two] vpaddd xmm12,xmm11,XMMWORD[one] vpaddd xmm15,xmm11,XMMWORD[two] vpxor xmm7,xmm7,XMMWORD[r8] vpxor xmm8,xmm8,XMMWORD[r8] vpxor xmm9,xmm9,XMMWORD[r8] vpxor xmm10,xmm10,XMMWORD[r8] vpxor xmm11,xmm11,XMMWORD[r8] vpxor xmm12,xmm12,XMMWORD[r8] vmovdqu xmm4,XMMWORD[16+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[32+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[48+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[64+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[80+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[96+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[112+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[128+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[144+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[160+r8] vaesenclast xmm7,xmm7,xmm4 vaesenclast xmm8,xmm8,xmm4 vaesenclast xmm9,xmm9,xmm4 vaesenclast xmm10,xmm10,xmm4 vaesenclast xmm11,xmm11,xmm4 vaesenclast xmm12,xmm12,xmm4 vpxor xmm7,xmm7,XMMWORD[rdi] vpxor xmm8,xmm8,XMMWORD[16+rdi] vpxor xmm9,xmm9,XMMWORD[32+rdi] vpxor xmm10,xmm10,XMMWORD[48+rdi] vpxor xmm11,xmm11,XMMWORD[64+rdi] vpxor xmm12,xmm12,XMMWORD[80+rdi] vmovdqu XMMWORD[rsi],xmm7 vmovdqu XMMWORD[16+rsi],xmm8 vmovdqu XMMWORD[32+rsi],xmm9 vmovdqu XMMWORD[48+rsi],xmm10 vmovdqu XMMWORD[64+rsi],xmm11 vmovdqu XMMWORD[80+rsi],xmm12 add rdi,96 add rsi,96 jmp NEAR $L$128_dec_loop1 ALIGN 64 $L$128_dec_loop1: cmp r9,96 jb NEAR $L$128_dec_finish_96 sub r9,96 vmovdqa xmm6,xmm12 vmovdqa XMMWORD[(16-32)+rax],xmm11 vmovdqa XMMWORD[(32-32)+rax],xmm10 vmovdqa XMMWORD[(48-32)+rax],xmm9 vmovdqa XMMWORD[(64-32)+rax],xmm8 vmovdqa XMMWORD[(80-32)+rax],xmm7 vmovdqa xmm7,xmm15 vpaddd xmm8,xmm7,XMMWORD[one] vpaddd xmm9,xmm7,XMMWORD[two] vpaddd xmm10,xmm9,XMMWORD[one] vpaddd xmm11,xmm9,XMMWORD[two] vpaddd xmm12,xmm11,XMMWORD[one] vpaddd xmm15,xmm11,XMMWORD[two] vmovdqa xmm4,XMMWORD[r8] vpxor xmm7,xmm7,xmm4 vpxor xmm8,xmm8,xmm4 vpxor xmm9,xmm9,xmm4 vpxor xmm10,xmm10,xmm4 vpxor xmm11,xmm11,xmm4 vpxor xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[((0-32))+rcx] vpclmulqdq xmm2,xmm6,xmm4,0x11 vpclmulqdq xmm3,xmm6,xmm4,0x00 vpclmulqdq xmm1,xmm6,xmm4,0x01 vpclmulqdq xmm4,xmm6,xmm4,0x10 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[16+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[((-16))+rax] vmovdqu xmm13,XMMWORD[((-16))+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[32+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[rax] vmovdqu xmm13,XMMWORD[rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[48+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[16+rax] vmovdqu xmm13,XMMWORD[16+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[64+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[32+rax] vmovdqu xmm13,XMMWORD[32+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[80+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[96+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[112+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqa xmm6,XMMWORD[((80-32))+rax] vpxor xmm6,xmm6,xmm0 vmovdqu xmm5,XMMWORD[((80-32))+rcx] vpclmulqdq xmm4,xmm6,xmm5,0x01 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x10 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[128+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vpsrldq xmm4,xmm1,8 vpxor xmm5,xmm2,xmm4 vpslldq xmm4,xmm1,8 vpxor xmm0,xmm3,xmm4 vmovdqa xmm3,XMMWORD[poly] vmovdqu xmm4,XMMWORD[144+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[160+r8] vpalignr xmm2,xmm0,xmm0,8 vpclmulqdq xmm0,xmm0,xmm3,0x10 vpxor xmm0,xmm2,xmm0 vpxor xmm4,xmm6,XMMWORD[rdi] vaesenclast xmm7,xmm7,xmm4 vpxor xmm4,xmm6,XMMWORD[16+rdi] vaesenclast xmm8,xmm8,xmm4 vpxor xmm4,xmm6,XMMWORD[32+rdi] vaesenclast xmm9,xmm9,xmm4 vpxor xmm4,xmm6,XMMWORD[48+rdi] vaesenclast xmm10,xmm10,xmm4 vpxor xmm4,xmm6,XMMWORD[64+rdi] vaesenclast xmm11,xmm11,xmm4 vpxor xmm4,xmm6,XMMWORD[80+rdi] vaesenclast xmm12,xmm12,xmm4 vpalignr xmm2,xmm0,xmm0,8 vpclmulqdq xmm0,xmm0,xmm3,0x10 vpxor xmm0,xmm2,xmm0 vmovdqu XMMWORD[rsi],xmm7 vmovdqu XMMWORD[16+rsi],xmm8 vmovdqu XMMWORD[32+rsi],xmm9 vmovdqu XMMWORD[48+rsi],xmm10 vmovdqu XMMWORD[64+rsi],xmm11 vmovdqu XMMWORD[80+rsi],xmm12 vpxor xmm0,xmm0,xmm5 lea rdi,[96+rdi] lea rsi,[96+rsi] jmp NEAR $L$128_dec_loop1 $L$128_dec_finish_96: vmovdqa xmm6,xmm12 vmovdqa XMMWORD[(16-32)+rax],xmm11 vmovdqa XMMWORD[(32-32)+rax],xmm10 vmovdqa XMMWORD[(48-32)+rax],xmm9 vmovdqa XMMWORD[(64-32)+rax],xmm8 vmovdqa XMMWORD[(80-32)+rax],xmm7 vmovdqu xmm4,XMMWORD[((0-32))+rcx] vpclmulqdq xmm1,xmm6,xmm4,0x10 vpclmulqdq xmm2,xmm6,xmm4,0x11 vpclmulqdq xmm3,xmm6,xmm4,0x00 vpclmulqdq xmm4,xmm6,xmm4,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[((-16))+rax] vmovdqu xmm13,XMMWORD[((-16))+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[rax] vmovdqu xmm13,XMMWORD[rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[16+rax] vmovdqu xmm13,XMMWORD[16+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[32+rax] vmovdqu xmm13,XMMWORD[32+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[((80-32))+rax] vpxor xmm6,xmm6,xmm0 vmovdqu xmm5,XMMWORD[((80-32))+rcx] vpclmulqdq xmm4,xmm6,xmm5,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x01 vpxor xmm1,xmm1,xmm4 vpsrldq xmm4,xmm1,8 vpxor xmm5,xmm2,xmm4 vpslldq xmm4,xmm1,8 vpxor xmm0,xmm3,xmm4 vmovdqa xmm3,XMMWORD[poly] vpalignr xmm2,xmm0,xmm0,8 vpclmulqdq xmm0,xmm0,xmm3,0x10 vpxor xmm0,xmm2,xmm0 vpalignr xmm2,xmm0,xmm0,8 vpclmulqdq xmm0,xmm0,xmm3,0x10 vpxor xmm0,xmm2,xmm0 vpxor xmm0,xmm0,xmm5 $L$128_dec_loop2: cmp r9,16 jb NEAR $L$128_dec_out sub r9,16 vmovdqa xmm2,xmm15 vpaddd xmm15,xmm15,XMMWORD[one] vpxor xmm2,xmm2,XMMWORD[r8] vaesenc xmm2,xmm2,XMMWORD[16+r8] vaesenc xmm2,xmm2,XMMWORD[32+r8] vaesenc xmm2,xmm2,XMMWORD[48+r8] vaesenc xmm2,xmm2,XMMWORD[64+r8] vaesenc xmm2,xmm2,XMMWORD[80+r8] vaesenc xmm2,xmm2,XMMWORD[96+r8] vaesenc xmm2,xmm2,XMMWORD[112+r8] vaesenc xmm2,xmm2,XMMWORD[128+r8] vaesenc xmm2,xmm2,XMMWORD[144+r8] vaesenclast xmm2,xmm2,XMMWORD[160+r8] vpxor xmm2,xmm2,XMMWORD[rdi] vmovdqu XMMWORD[rsi],xmm2 add rdi,16 add rsi,16 vpxor xmm0,xmm0,xmm2 vmovdqa xmm1,XMMWORD[((-32))+rcx] call GFMUL jmp NEAR $L$128_dec_loop2 $L$128_dec_out: vmovdqu XMMWORD[rdx],xmm0 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes128gcmsiv_dec: global aes128gcmsiv_ecb_enc_block ALIGN 16 aes128gcmsiv_ecb_enc_block: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes128gcmsiv_ecb_enc_block: mov rdi,rcx mov rsi,rdx mov rdx,r8 vmovdqa xmm1,XMMWORD[rdi] vpxor xmm1,xmm1,XMMWORD[rdx] vaesenc xmm1,xmm1,XMMWORD[16+rdx] vaesenc xmm1,xmm1,XMMWORD[32+rdx] vaesenc xmm1,xmm1,XMMWORD[48+rdx] vaesenc xmm1,xmm1,XMMWORD[64+rdx] vaesenc xmm1,xmm1,XMMWORD[80+rdx] vaesenc xmm1,xmm1,XMMWORD[96+rdx] vaesenc xmm1,xmm1,XMMWORD[112+rdx] vaesenc xmm1,xmm1,XMMWORD[128+rdx] vaesenc xmm1,xmm1,XMMWORD[144+rdx] vaesenclast xmm1,xmm1,XMMWORD[160+rdx] vmovdqa XMMWORD[rsi],xmm1 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes128gcmsiv_ecb_enc_block: global aes256gcmsiv_aes_ks_enc_x1 ALIGN 16 aes256gcmsiv_aes_ks_enc_x1: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 vmovdqa xmm0,XMMWORD[con1] vmovdqa xmm15,XMMWORD[mask] vmovdqa xmm8,XMMWORD[rdi] vmovdqa xmm1,XMMWORD[rcx] vmovdqa xmm3,XMMWORD[16+rcx] vpxor xmm8,xmm8,xmm1 vaesenc xmm8,xmm8,xmm3 vmovdqu XMMWORD[rdx],xmm1 vmovdqu XMMWORD[16+rdx],xmm3 vpxor xmm14,xmm14,xmm14 vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpslldq xmm4,xmm1,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vaesenc xmm8,xmm8,xmm1 vmovdqu XMMWORD[32+rdx],xmm1 vpshufd xmm2,xmm1,0xff vaesenclast xmm2,xmm2,xmm14 vpslldq xmm4,xmm3,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpxor xmm3,xmm3,xmm2 vaesenc xmm8,xmm8,xmm3 vmovdqu XMMWORD[48+rdx],xmm3 vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpslldq xmm4,xmm1,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vaesenc xmm8,xmm8,xmm1 vmovdqu XMMWORD[64+rdx],xmm1 vpshufd xmm2,xmm1,0xff vaesenclast xmm2,xmm2,xmm14 vpslldq xmm4,xmm3,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpxor xmm3,xmm3,xmm2 vaesenc xmm8,xmm8,xmm3 vmovdqu XMMWORD[80+rdx],xmm3 vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpslldq xmm4,xmm1,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vaesenc xmm8,xmm8,xmm1 vmovdqu XMMWORD[96+rdx],xmm1 vpshufd xmm2,xmm1,0xff vaesenclast xmm2,xmm2,xmm14 vpslldq xmm4,xmm3,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpxor xmm3,xmm3,xmm2 vaesenc xmm8,xmm8,xmm3 vmovdqu XMMWORD[112+rdx],xmm3 vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpslldq xmm4,xmm1,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vaesenc xmm8,xmm8,xmm1 vmovdqu XMMWORD[128+rdx],xmm1 vpshufd xmm2,xmm1,0xff vaesenclast xmm2,xmm2,xmm14 vpslldq xmm4,xmm3,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpxor xmm3,xmm3,xmm2 vaesenc xmm8,xmm8,xmm3 vmovdqu XMMWORD[144+rdx],xmm3 vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpslldq xmm4,xmm1,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vaesenc xmm8,xmm8,xmm1 vmovdqu XMMWORD[160+rdx],xmm1 vpshufd xmm2,xmm1,0xff vaesenclast xmm2,xmm2,xmm14 vpslldq xmm4,xmm3,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpxor xmm3,xmm3,xmm2 vaesenc xmm8,xmm8,xmm3 vmovdqu XMMWORD[176+rdx],xmm3 vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslld xmm0,xmm0,1 vpslldq xmm4,xmm1,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vaesenc xmm8,xmm8,xmm1 vmovdqu XMMWORD[192+rdx],xmm1 vpshufd xmm2,xmm1,0xff vaesenclast xmm2,xmm2,xmm14 vpslldq xmm4,xmm3,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm3,xmm3,xmm4 vpxor xmm3,xmm3,xmm2 vaesenc xmm8,xmm8,xmm3 vmovdqu XMMWORD[208+rdx],xmm3 vpshufb xmm2,xmm3,xmm15 vaesenclast xmm2,xmm2,xmm0 vpslldq xmm4,xmm1,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpslldq xmm4,xmm4,4 vpxor xmm1,xmm1,xmm4 vpxor xmm1,xmm1,xmm2 vaesenclast xmm8,xmm8,xmm1 vmovdqu XMMWORD[224+rdx],xmm1 vmovdqa XMMWORD[rsi],xmm8 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes256gcmsiv_aes_ks_enc_x1: global aes256gcmsiv_ecb_enc_block ALIGN 16 aes256gcmsiv_ecb_enc_block: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes256gcmsiv_ecb_enc_block: mov rdi,rcx mov rsi,rdx mov rdx,r8 vmovdqa xmm1,XMMWORD[rdi] vpxor xmm1,xmm1,XMMWORD[rdx] vaesenc xmm1,xmm1,XMMWORD[16+rdx] vaesenc xmm1,xmm1,XMMWORD[32+rdx] vaesenc xmm1,xmm1,XMMWORD[48+rdx] vaesenc xmm1,xmm1,XMMWORD[64+rdx] vaesenc xmm1,xmm1,XMMWORD[80+rdx] vaesenc xmm1,xmm1,XMMWORD[96+rdx] vaesenc xmm1,xmm1,XMMWORD[112+rdx] vaesenc xmm1,xmm1,XMMWORD[128+rdx] vaesenc xmm1,xmm1,XMMWORD[144+rdx] vaesenc xmm1,xmm1,XMMWORD[160+rdx] vaesenc xmm1,xmm1,XMMWORD[176+rdx] vaesenc xmm1,xmm1,XMMWORD[192+rdx] vaesenc xmm1,xmm1,XMMWORD[208+rdx] vaesenclast xmm1,xmm1,XMMWORD[224+rdx] vmovdqa XMMWORD[rsi],xmm1 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes256gcmsiv_ecb_enc_block: global aes256gcmsiv_enc_msg_x4 ALIGN 16 aes256gcmsiv_enc_msg_x4: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes256gcmsiv_enc_msg_x4: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] test r8,r8 jnz NEAR $L$256_enc_msg_x4_start mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$256_enc_msg_x4_start: mov r10,r8 shr r8,4 shl r10,60 jz NEAR $L$256_enc_msg_x4_start2 add r8,1 $L$256_enc_msg_x4_start2: mov r10,r8 shl r10,62 shr r10,62 vmovdqa xmm15,XMMWORD[rdx] vpor xmm15,xmm15,XMMWORD[OR_MASK] vmovdqa xmm4,XMMWORD[four] vmovdqa xmm0,xmm15 vpaddd xmm1,xmm15,XMMWORD[one] vpaddd xmm2,xmm15,XMMWORD[two] vpaddd xmm3,xmm15,XMMWORD[three] shr r8,2 je NEAR $L$256_enc_msg_x4_check_remainder sub rsi,64 sub rdi,64 $L$256_enc_msg_x4_loop1: add rsi,64 add rdi,64 vmovdqa xmm5,xmm0 vmovdqa xmm6,xmm1 vmovdqa xmm7,xmm2 vmovdqa xmm8,xmm3 vpxor xmm5,xmm5,XMMWORD[rcx] vpxor xmm6,xmm6,XMMWORD[rcx] vpxor xmm7,xmm7,XMMWORD[rcx] vpxor xmm8,xmm8,XMMWORD[rcx] vmovdqu xmm12,XMMWORD[16+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vpaddd xmm0,xmm0,xmm4 vmovdqu xmm12,XMMWORD[32+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vpaddd xmm1,xmm1,xmm4 vmovdqu xmm12,XMMWORD[48+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vpaddd xmm2,xmm2,xmm4 vmovdqu xmm12,XMMWORD[64+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vpaddd xmm3,xmm3,xmm4 vmovdqu xmm12,XMMWORD[80+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[96+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[112+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[128+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[144+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[160+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[176+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[192+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[208+rcx] vaesenc xmm5,xmm5,xmm12 vaesenc xmm6,xmm6,xmm12 vaesenc xmm7,xmm7,xmm12 vaesenc xmm8,xmm8,xmm12 vmovdqu xmm12,XMMWORD[224+rcx] vaesenclast xmm5,xmm5,xmm12 vaesenclast xmm6,xmm6,xmm12 vaesenclast xmm7,xmm7,xmm12 vaesenclast xmm8,xmm8,xmm12 vpxor xmm5,xmm5,XMMWORD[rdi] vpxor xmm6,xmm6,XMMWORD[16+rdi] vpxor xmm7,xmm7,XMMWORD[32+rdi] vpxor xmm8,xmm8,XMMWORD[48+rdi] sub r8,1 vmovdqu XMMWORD[rsi],xmm5 vmovdqu XMMWORD[16+rsi],xmm6 vmovdqu XMMWORD[32+rsi],xmm7 vmovdqu XMMWORD[48+rsi],xmm8 jne NEAR $L$256_enc_msg_x4_loop1 add rsi,64 add rdi,64 $L$256_enc_msg_x4_check_remainder: cmp r10,0 je NEAR $L$256_enc_msg_x4_out $L$256_enc_msg_x4_loop2: vmovdqa xmm5,xmm0 vpaddd xmm0,xmm0,XMMWORD[one] vpxor xmm5,xmm5,XMMWORD[rcx] vaesenc xmm5,xmm5,XMMWORD[16+rcx] vaesenc xmm5,xmm5,XMMWORD[32+rcx] vaesenc xmm5,xmm5,XMMWORD[48+rcx] vaesenc xmm5,xmm5,XMMWORD[64+rcx] vaesenc xmm5,xmm5,XMMWORD[80+rcx] vaesenc xmm5,xmm5,XMMWORD[96+rcx] vaesenc xmm5,xmm5,XMMWORD[112+rcx] vaesenc xmm5,xmm5,XMMWORD[128+rcx] vaesenc xmm5,xmm5,XMMWORD[144+rcx] vaesenc xmm5,xmm5,XMMWORD[160+rcx] vaesenc xmm5,xmm5,XMMWORD[176+rcx] vaesenc xmm5,xmm5,XMMWORD[192+rcx] vaesenc xmm5,xmm5,XMMWORD[208+rcx] vaesenclast xmm5,xmm5,XMMWORD[224+rcx] vpxor xmm5,xmm5,XMMWORD[rdi] vmovdqu XMMWORD[rsi],xmm5 add rdi,16 add rsi,16 sub r10,1 jne NEAR $L$256_enc_msg_x4_loop2 $L$256_enc_msg_x4_out: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes256gcmsiv_enc_msg_x4: global aes256gcmsiv_enc_msg_x8 ALIGN 16 aes256gcmsiv_enc_msg_x8: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes256gcmsiv_enc_msg_x8: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] test r8,r8 jnz NEAR $L$256_enc_msg_x8_start mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$256_enc_msg_x8_start: mov r11,rsp sub r11,16 and r11,-64 mov r10,r8 shr r8,4 shl r10,60 jz NEAR $L$256_enc_msg_x8_start2 add r8,1 $L$256_enc_msg_x8_start2: mov r10,r8 shl r10,61 shr r10,61 vmovdqa xmm1,XMMWORD[rdx] vpor xmm1,xmm1,XMMWORD[OR_MASK] vpaddd xmm0,xmm1,XMMWORD[seven] vmovdqa XMMWORD[r11],xmm0 vpaddd xmm9,xmm1,XMMWORD[one] vpaddd xmm10,xmm1,XMMWORD[two] vpaddd xmm11,xmm1,XMMWORD[three] vpaddd xmm12,xmm1,XMMWORD[four] vpaddd xmm13,xmm1,XMMWORD[five] vpaddd xmm14,xmm1,XMMWORD[six] vmovdqa xmm0,xmm1 shr r8,3 jz NEAR $L$256_enc_msg_x8_check_remainder sub rsi,128 sub rdi,128 $L$256_enc_msg_x8_loop1: add rsi,128 add rdi,128 vmovdqa xmm1,xmm0 vmovdqa xmm2,xmm9 vmovdqa xmm3,xmm10 vmovdqa xmm4,xmm11 vmovdqa xmm5,xmm12 vmovdqa xmm6,xmm13 vmovdqa xmm7,xmm14 vmovdqa xmm8,XMMWORD[r11] vpxor xmm1,xmm1,XMMWORD[rcx] vpxor xmm2,xmm2,XMMWORD[rcx] vpxor xmm3,xmm3,XMMWORD[rcx] vpxor xmm4,xmm4,XMMWORD[rcx] vpxor xmm5,xmm5,XMMWORD[rcx] vpxor xmm6,xmm6,XMMWORD[rcx] vpxor xmm7,xmm7,XMMWORD[rcx] vpxor xmm8,xmm8,XMMWORD[rcx] vmovdqu xmm15,XMMWORD[16+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vmovdqa xmm14,XMMWORD[r11] vpaddd xmm14,xmm14,XMMWORD[eight] vmovdqa XMMWORD[r11],xmm14 vmovdqu xmm15,XMMWORD[32+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpsubd xmm14,xmm14,XMMWORD[one] vmovdqu xmm15,XMMWORD[48+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm0,xmm0,XMMWORD[eight] vmovdqu xmm15,XMMWORD[64+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm9,xmm9,XMMWORD[eight] vmovdqu xmm15,XMMWORD[80+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm10,xmm10,XMMWORD[eight] vmovdqu xmm15,XMMWORD[96+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm11,xmm11,XMMWORD[eight] vmovdqu xmm15,XMMWORD[112+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm12,xmm12,XMMWORD[eight] vmovdqu xmm15,XMMWORD[128+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vpaddd xmm13,xmm13,XMMWORD[eight] vmovdqu xmm15,XMMWORD[144+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vmovdqu xmm15,XMMWORD[160+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vmovdqu xmm15,XMMWORD[176+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vmovdqu xmm15,XMMWORD[192+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vmovdqu xmm15,XMMWORD[208+rcx] vaesenc xmm1,xmm1,xmm15 vaesenc xmm2,xmm2,xmm15 vaesenc xmm3,xmm3,xmm15 vaesenc xmm4,xmm4,xmm15 vaesenc xmm5,xmm5,xmm15 vaesenc xmm6,xmm6,xmm15 vaesenc xmm7,xmm7,xmm15 vaesenc xmm8,xmm8,xmm15 vmovdqu xmm15,XMMWORD[224+rcx] vaesenclast xmm1,xmm1,xmm15 vaesenclast xmm2,xmm2,xmm15 vaesenclast xmm3,xmm3,xmm15 vaesenclast xmm4,xmm4,xmm15 vaesenclast xmm5,xmm5,xmm15 vaesenclast xmm6,xmm6,xmm15 vaesenclast xmm7,xmm7,xmm15 vaesenclast xmm8,xmm8,xmm15 vpxor xmm1,xmm1,XMMWORD[rdi] vpxor xmm2,xmm2,XMMWORD[16+rdi] vpxor xmm3,xmm3,XMMWORD[32+rdi] vpxor xmm4,xmm4,XMMWORD[48+rdi] vpxor xmm5,xmm5,XMMWORD[64+rdi] vpxor xmm6,xmm6,XMMWORD[80+rdi] vpxor xmm7,xmm7,XMMWORD[96+rdi] vpxor xmm8,xmm8,XMMWORD[112+rdi] sub r8,1 vmovdqu XMMWORD[rsi],xmm1 vmovdqu XMMWORD[16+rsi],xmm2 vmovdqu XMMWORD[32+rsi],xmm3 vmovdqu XMMWORD[48+rsi],xmm4 vmovdqu XMMWORD[64+rsi],xmm5 vmovdqu XMMWORD[80+rsi],xmm6 vmovdqu XMMWORD[96+rsi],xmm7 vmovdqu XMMWORD[112+rsi],xmm8 jne NEAR $L$256_enc_msg_x8_loop1 add rsi,128 add rdi,128 $L$256_enc_msg_x8_check_remainder: cmp r10,0 je NEAR $L$256_enc_msg_x8_out $L$256_enc_msg_x8_loop2: vmovdqa xmm1,xmm0 vpaddd xmm0,xmm0,XMMWORD[one] vpxor xmm1,xmm1,XMMWORD[rcx] vaesenc xmm1,xmm1,XMMWORD[16+rcx] vaesenc xmm1,xmm1,XMMWORD[32+rcx] vaesenc xmm1,xmm1,XMMWORD[48+rcx] vaesenc xmm1,xmm1,XMMWORD[64+rcx] vaesenc xmm1,xmm1,XMMWORD[80+rcx] vaesenc xmm1,xmm1,XMMWORD[96+rcx] vaesenc xmm1,xmm1,XMMWORD[112+rcx] vaesenc xmm1,xmm1,XMMWORD[128+rcx] vaesenc xmm1,xmm1,XMMWORD[144+rcx] vaesenc xmm1,xmm1,XMMWORD[160+rcx] vaesenc xmm1,xmm1,XMMWORD[176+rcx] vaesenc xmm1,xmm1,XMMWORD[192+rcx] vaesenc xmm1,xmm1,XMMWORD[208+rcx] vaesenclast xmm1,xmm1,XMMWORD[224+rcx] vpxor xmm1,xmm1,XMMWORD[rdi] vmovdqu XMMWORD[rsi],xmm1 add rdi,16 add rsi,16 sub r10,1 jnz NEAR $L$256_enc_msg_x8_loop2 $L$256_enc_msg_x8_out: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes256gcmsiv_enc_msg_x8: global aes256gcmsiv_dec ALIGN 16 aes256gcmsiv_dec: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes256gcmsiv_dec: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] mov r9,QWORD[48+rsp] test r9,~15 jnz NEAR $L$256_dec_start mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$256_dec_start: vzeroupper vmovdqa xmm0,XMMWORD[rdx] mov rax,rdx lea rax,[32+rax] lea rcx,[32+rcx] vmovdqu xmm15,XMMWORD[r9*1+rdi] vpor xmm15,xmm15,XMMWORD[OR_MASK] and r9,~15 cmp r9,96 jb NEAR $L$256_dec_loop2 sub r9,96 vmovdqa xmm7,xmm15 vpaddd xmm8,xmm7,XMMWORD[one] vpaddd xmm9,xmm7,XMMWORD[two] vpaddd xmm10,xmm9,XMMWORD[one] vpaddd xmm11,xmm9,XMMWORD[two] vpaddd xmm12,xmm11,XMMWORD[one] vpaddd xmm15,xmm11,XMMWORD[two] vpxor xmm7,xmm7,XMMWORD[r8] vpxor xmm8,xmm8,XMMWORD[r8] vpxor xmm9,xmm9,XMMWORD[r8] vpxor xmm10,xmm10,XMMWORD[r8] vpxor xmm11,xmm11,XMMWORD[r8] vpxor xmm12,xmm12,XMMWORD[r8] vmovdqu xmm4,XMMWORD[16+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[32+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[48+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[64+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[80+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[96+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[112+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[128+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[144+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[160+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[176+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[192+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[208+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[224+r8] vaesenclast xmm7,xmm7,xmm4 vaesenclast xmm8,xmm8,xmm4 vaesenclast xmm9,xmm9,xmm4 vaesenclast xmm10,xmm10,xmm4 vaesenclast xmm11,xmm11,xmm4 vaesenclast xmm12,xmm12,xmm4 vpxor xmm7,xmm7,XMMWORD[rdi] vpxor xmm8,xmm8,XMMWORD[16+rdi] vpxor xmm9,xmm9,XMMWORD[32+rdi] vpxor xmm10,xmm10,XMMWORD[48+rdi] vpxor xmm11,xmm11,XMMWORD[64+rdi] vpxor xmm12,xmm12,XMMWORD[80+rdi] vmovdqu XMMWORD[rsi],xmm7 vmovdqu XMMWORD[16+rsi],xmm8 vmovdqu XMMWORD[32+rsi],xmm9 vmovdqu XMMWORD[48+rsi],xmm10 vmovdqu XMMWORD[64+rsi],xmm11 vmovdqu XMMWORD[80+rsi],xmm12 add rdi,96 add rsi,96 jmp NEAR $L$256_dec_loop1 ALIGN 64 $L$256_dec_loop1: cmp r9,96 jb NEAR $L$256_dec_finish_96 sub r9,96 vmovdqa xmm6,xmm12 vmovdqa XMMWORD[(16-32)+rax],xmm11 vmovdqa XMMWORD[(32-32)+rax],xmm10 vmovdqa XMMWORD[(48-32)+rax],xmm9 vmovdqa XMMWORD[(64-32)+rax],xmm8 vmovdqa XMMWORD[(80-32)+rax],xmm7 vmovdqa xmm7,xmm15 vpaddd xmm8,xmm7,XMMWORD[one] vpaddd xmm9,xmm7,XMMWORD[two] vpaddd xmm10,xmm9,XMMWORD[one] vpaddd xmm11,xmm9,XMMWORD[two] vpaddd xmm12,xmm11,XMMWORD[one] vpaddd xmm15,xmm11,XMMWORD[two] vmovdqa xmm4,XMMWORD[r8] vpxor xmm7,xmm7,xmm4 vpxor xmm8,xmm8,xmm4 vpxor xmm9,xmm9,xmm4 vpxor xmm10,xmm10,xmm4 vpxor xmm11,xmm11,xmm4 vpxor xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[((0-32))+rcx] vpclmulqdq xmm2,xmm6,xmm4,0x11 vpclmulqdq xmm3,xmm6,xmm4,0x00 vpclmulqdq xmm1,xmm6,xmm4,0x01 vpclmulqdq xmm4,xmm6,xmm4,0x10 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[16+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[((-16))+rax] vmovdqu xmm13,XMMWORD[((-16))+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[32+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[rax] vmovdqu xmm13,XMMWORD[rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[48+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[16+rax] vmovdqu xmm13,XMMWORD[16+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[64+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[32+rax] vmovdqu xmm13,XMMWORD[32+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[80+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[96+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[112+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqa xmm6,XMMWORD[((80-32))+rax] vpxor xmm6,xmm6,xmm0 vmovdqu xmm5,XMMWORD[((80-32))+rcx] vpclmulqdq xmm4,xmm6,xmm5,0x01 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x10 vpxor xmm1,xmm1,xmm4 vmovdqu xmm4,XMMWORD[128+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vpsrldq xmm4,xmm1,8 vpxor xmm5,xmm2,xmm4 vpslldq xmm4,xmm1,8 vpxor xmm0,xmm3,xmm4 vmovdqa xmm3,XMMWORD[poly] vmovdqu xmm4,XMMWORD[144+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[160+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[176+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[192+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm4,XMMWORD[208+r8] vaesenc xmm7,xmm7,xmm4 vaesenc xmm8,xmm8,xmm4 vaesenc xmm9,xmm9,xmm4 vaesenc xmm10,xmm10,xmm4 vaesenc xmm11,xmm11,xmm4 vaesenc xmm12,xmm12,xmm4 vmovdqu xmm6,XMMWORD[224+r8] vpalignr xmm2,xmm0,xmm0,8 vpclmulqdq xmm0,xmm0,xmm3,0x10 vpxor xmm0,xmm2,xmm0 vpxor xmm4,xmm6,XMMWORD[rdi] vaesenclast xmm7,xmm7,xmm4 vpxor xmm4,xmm6,XMMWORD[16+rdi] vaesenclast xmm8,xmm8,xmm4 vpxor xmm4,xmm6,XMMWORD[32+rdi] vaesenclast xmm9,xmm9,xmm4 vpxor xmm4,xmm6,XMMWORD[48+rdi] vaesenclast xmm10,xmm10,xmm4 vpxor xmm4,xmm6,XMMWORD[64+rdi] vaesenclast xmm11,xmm11,xmm4 vpxor xmm4,xmm6,XMMWORD[80+rdi] vaesenclast xmm12,xmm12,xmm4 vpalignr xmm2,xmm0,xmm0,8 vpclmulqdq xmm0,xmm0,xmm3,0x10 vpxor xmm0,xmm2,xmm0 vmovdqu XMMWORD[rsi],xmm7 vmovdqu XMMWORD[16+rsi],xmm8 vmovdqu XMMWORD[32+rsi],xmm9 vmovdqu XMMWORD[48+rsi],xmm10 vmovdqu XMMWORD[64+rsi],xmm11 vmovdqu XMMWORD[80+rsi],xmm12 vpxor xmm0,xmm0,xmm5 lea rdi,[96+rdi] lea rsi,[96+rsi] jmp NEAR $L$256_dec_loop1 $L$256_dec_finish_96: vmovdqa xmm6,xmm12 vmovdqa XMMWORD[(16-32)+rax],xmm11 vmovdqa XMMWORD[(32-32)+rax],xmm10 vmovdqa XMMWORD[(48-32)+rax],xmm9 vmovdqa XMMWORD[(64-32)+rax],xmm8 vmovdqa XMMWORD[(80-32)+rax],xmm7 vmovdqu xmm4,XMMWORD[((0-32))+rcx] vpclmulqdq xmm1,xmm6,xmm4,0x10 vpclmulqdq xmm2,xmm6,xmm4,0x11 vpclmulqdq xmm3,xmm6,xmm4,0x00 vpclmulqdq xmm4,xmm6,xmm4,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[((-16))+rax] vmovdqu xmm13,XMMWORD[((-16))+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[rax] vmovdqu xmm13,XMMWORD[rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[16+rax] vmovdqu xmm13,XMMWORD[16+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[32+rax] vmovdqu xmm13,XMMWORD[32+rcx] vpclmulqdq xmm4,xmm6,xmm13,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm13,0x01 vpxor xmm1,xmm1,xmm4 vmovdqu xmm6,XMMWORD[((80-32))+rax] vpxor xmm6,xmm6,xmm0 vmovdqu xmm5,XMMWORD[((80-32))+rcx] vpclmulqdq xmm4,xmm6,xmm5,0x11 vpxor xmm2,xmm2,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x00 vpxor xmm3,xmm3,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x10 vpxor xmm1,xmm1,xmm4 vpclmulqdq xmm4,xmm6,xmm5,0x01 vpxor xmm1,xmm1,xmm4 vpsrldq xmm4,xmm1,8 vpxor xmm5,xmm2,xmm4 vpslldq xmm4,xmm1,8 vpxor xmm0,xmm3,xmm4 vmovdqa xmm3,XMMWORD[poly] vpalignr xmm2,xmm0,xmm0,8 vpclmulqdq xmm0,xmm0,xmm3,0x10 vpxor xmm0,xmm2,xmm0 vpalignr xmm2,xmm0,xmm0,8 vpclmulqdq xmm0,xmm0,xmm3,0x10 vpxor xmm0,xmm2,xmm0 vpxor xmm0,xmm0,xmm5 $L$256_dec_loop2: cmp r9,16 jb NEAR $L$256_dec_out sub r9,16 vmovdqa xmm2,xmm15 vpaddd xmm15,xmm15,XMMWORD[one] vpxor xmm2,xmm2,XMMWORD[r8] vaesenc xmm2,xmm2,XMMWORD[16+r8] vaesenc xmm2,xmm2,XMMWORD[32+r8] vaesenc xmm2,xmm2,XMMWORD[48+r8] vaesenc xmm2,xmm2,XMMWORD[64+r8] vaesenc xmm2,xmm2,XMMWORD[80+r8] vaesenc xmm2,xmm2,XMMWORD[96+r8] vaesenc xmm2,xmm2,XMMWORD[112+r8] vaesenc xmm2,xmm2,XMMWORD[128+r8] vaesenc xmm2,xmm2,XMMWORD[144+r8] vaesenc xmm2,xmm2,XMMWORD[160+r8] vaesenc xmm2,xmm2,XMMWORD[176+r8] vaesenc xmm2,xmm2,XMMWORD[192+r8] vaesenc xmm2,xmm2,XMMWORD[208+r8] vaesenclast xmm2,xmm2,XMMWORD[224+r8] vpxor xmm2,xmm2,XMMWORD[rdi] vmovdqu XMMWORD[rsi],xmm2 add rdi,16 add rsi,16 vpxor xmm0,xmm0,xmm2 vmovdqa xmm1,XMMWORD[((-32))+rcx] call GFMUL jmp NEAR $L$256_dec_loop2 $L$256_dec_out: vmovdqu XMMWORD[rdx],xmm0 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes256gcmsiv_dec: global aes256gcmsiv_kdf ALIGN 16 aes256gcmsiv_kdf: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aes256gcmsiv_kdf: mov rdi,rcx mov rsi,rdx mov rdx,r8 vmovdqa xmm1,XMMWORD[rdx] vmovdqa xmm4,XMMWORD[rdi] vmovdqa xmm11,XMMWORD[and_mask] vmovdqa xmm8,XMMWORD[one] vpshufd xmm4,xmm4,0x90 vpand xmm4,xmm4,xmm11 vpaddd xmm6,xmm4,xmm8 vpaddd xmm7,xmm6,xmm8 vpaddd xmm11,xmm7,xmm8 vpaddd xmm12,xmm11,xmm8 vpaddd xmm13,xmm12,xmm8 vpxor xmm4,xmm4,xmm1 vpxor xmm6,xmm6,xmm1 vpxor xmm7,xmm7,xmm1 vpxor xmm11,xmm11,xmm1 vpxor xmm12,xmm12,xmm1 vpxor xmm13,xmm13,xmm1 vmovdqa xmm1,XMMWORD[16+rdx] vaesenc xmm4,xmm4,xmm1 vaesenc xmm6,xmm6,xmm1 vaesenc xmm7,xmm7,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovdqa xmm2,XMMWORD[32+rdx] vaesenc xmm4,xmm4,xmm2 vaesenc xmm6,xmm6,xmm2 vaesenc xmm7,xmm7,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vaesenc xmm13,xmm13,xmm2 vmovdqa xmm1,XMMWORD[48+rdx] vaesenc xmm4,xmm4,xmm1 vaesenc xmm6,xmm6,xmm1 vaesenc xmm7,xmm7,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovdqa xmm2,XMMWORD[64+rdx] vaesenc xmm4,xmm4,xmm2 vaesenc xmm6,xmm6,xmm2 vaesenc xmm7,xmm7,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vaesenc xmm13,xmm13,xmm2 vmovdqa xmm1,XMMWORD[80+rdx] vaesenc xmm4,xmm4,xmm1 vaesenc xmm6,xmm6,xmm1 vaesenc xmm7,xmm7,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovdqa xmm2,XMMWORD[96+rdx] vaesenc xmm4,xmm4,xmm2 vaesenc xmm6,xmm6,xmm2 vaesenc xmm7,xmm7,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vaesenc xmm13,xmm13,xmm2 vmovdqa xmm1,XMMWORD[112+rdx] vaesenc xmm4,xmm4,xmm1 vaesenc xmm6,xmm6,xmm1 vaesenc xmm7,xmm7,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovdqa xmm2,XMMWORD[128+rdx] vaesenc xmm4,xmm4,xmm2 vaesenc xmm6,xmm6,xmm2 vaesenc xmm7,xmm7,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vaesenc xmm13,xmm13,xmm2 vmovdqa xmm1,XMMWORD[144+rdx] vaesenc xmm4,xmm4,xmm1 vaesenc xmm6,xmm6,xmm1 vaesenc xmm7,xmm7,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovdqa xmm2,XMMWORD[160+rdx] vaesenc xmm4,xmm4,xmm2 vaesenc xmm6,xmm6,xmm2 vaesenc xmm7,xmm7,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vaesenc xmm13,xmm13,xmm2 vmovdqa xmm1,XMMWORD[176+rdx] vaesenc xmm4,xmm4,xmm1 vaesenc xmm6,xmm6,xmm1 vaesenc xmm7,xmm7,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovdqa xmm2,XMMWORD[192+rdx] vaesenc xmm4,xmm4,xmm2 vaesenc xmm6,xmm6,xmm2 vaesenc xmm7,xmm7,xmm2 vaesenc xmm11,xmm11,xmm2 vaesenc xmm12,xmm12,xmm2 vaesenc xmm13,xmm13,xmm2 vmovdqa xmm1,XMMWORD[208+rdx] vaesenc xmm4,xmm4,xmm1 vaesenc xmm6,xmm6,xmm1 vaesenc xmm7,xmm7,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovdqa xmm2,XMMWORD[224+rdx] vaesenclast xmm4,xmm4,xmm2 vaesenclast xmm6,xmm6,xmm2 vaesenclast xmm7,xmm7,xmm2 vaesenclast xmm11,xmm11,xmm2 vaesenclast xmm12,xmm12,xmm2 vaesenclast xmm13,xmm13,xmm2 vmovdqa XMMWORD[rsi],xmm4 vmovdqa XMMWORD[16+rsi],xmm6 vmovdqa XMMWORD[32+rsi],xmm7 vmovdqa XMMWORD[48+rsi],xmm11 vmovdqa XMMWORD[64+rsi],xmm12 vmovdqa XMMWORD[80+rsi],xmm13 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aes256gcmsiv_kdf: %else ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 ret %endif