.code ALIGN 16 aes128_key_expansion proc movdqu xmm1, xmmword ptr [rcx + 0] movdqu xmmword ptr [rdx + 0], xmm1 aeskeygenassist xmm2, xmm1, 1 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 16], xmm1 aeskeygenassist xmm2, xmm1, 2 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 32], xmm1 aeskeygenassist xmm2, xmm1, 4 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 48], xmm1 aeskeygenassist xmm2, xmm1, 8 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 64], xmm1 aeskeygenassist xmm2, xmm1, 16 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 80], xmm1 aeskeygenassist xmm2, xmm1, 32 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 96], xmm1 aeskeygenassist xmm2, xmm1, 64 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 112], xmm1 aeskeygenassist xmm2, xmm1, 128 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 128], xmm1 aeskeygenassist xmm2, xmm1, 27 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 144], xmm1 aeskeygenassist xmm2, xmm1, 54 pshufd xmm2, xmm2, 255 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 vpslldq xmm3, xmm1, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 160], xmm1 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 ret aes128_key_expansion endp ALIGN 16 aes128_keyhash_init proc mov r8, 579005069656919567 pinsrq xmm4, r8, 0 mov r8, 283686952306183 pinsrq xmm4, r8, 1 pxor xmm0, xmm0 movdqu xmmword ptr [rdx + 80], xmm0 mov r8, rcx movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pshufb xmm0, xmm4 mov rcx, rdx movdqu xmmword ptr [rcx + 32], xmm0 movdqu xmm0, xmm6 mov rax, r12 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 0], xmm1 movdqu xmm1, xmm6 movdqu xmm2, xmm6 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 16], xmm1 movdqu xmm2, xmm6 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 48], xmm1 movdqu xmm2, xmm6 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 64], xmm1 movdqu xmm2, xmm6 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 96], xmm1 movdqu xmm2, xmm6 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 112], xmm1 movdqu xmm6, xmm0 mov r12, rax ret aes128_keyhash_init endp ALIGN 16 aes256_key_expansion proc movdqu xmm1, xmmword ptr [rcx + 0] movdqu xmm3, xmmword ptr [rcx + 16] movdqu xmmword ptr [rdx + 0], xmm1 movdqu xmmword ptr [rdx + 16], xmm3 aeskeygenassist xmm2, xmm3, 1 pshufd xmm2, xmm2, 255 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 32], xmm1 aeskeygenassist xmm2, xmm1, 0 pshufd xmm2, xmm2, 170 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 pxor xmm3, xmm2 movdqu xmmword ptr [rdx + 48], xmm3 aeskeygenassist xmm2, xmm3, 2 pshufd xmm2, xmm2, 255 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 64], xmm1 aeskeygenassist xmm2, xmm1, 0 pshufd xmm2, xmm2, 170 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 pxor xmm3, xmm2 movdqu xmmword ptr [rdx + 80], xmm3 aeskeygenassist xmm2, xmm3, 4 pshufd xmm2, xmm2, 255 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 96], xmm1 aeskeygenassist xmm2, xmm1, 0 pshufd xmm2, xmm2, 170 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 pxor xmm3, xmm2 movdqu xmmword ptr [rdx + 112], xmm3 aeskeygenassist xmm2, xmm3, 8 pshufd xmm2, xmm2, 255 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 128], xmm1 aeskeygenassist xmm2, xmm1, 0 pshufd xmm2, xmm2, 170 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 pxor xmm3, xmm2 movdqu xmmword ptr [rdx + 144], xmm3 aeskeygenassist xmm2, xmm3, 16 pshufd xmm2, xmm2, 255 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 160], xmm1 aeskeygenassist xmm2, xmm1, 0 pshufd xmm2, xmm2, 170 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 pxor xmm3, xmm2 movdqu xmmword ptr [rdx + 176], xmm3 aeskeygenassist xmm2, xmm3, 32 pshufd xmm2, xmm2, 255 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 192], xmm1 aeskeygenassist xmm2, xmm1, 0 pshufd xmm2, xmm2, 170 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 vpslldq xmm4, xmm3, 4 pxor xmm3, xmm4 pxor xmm3, xmm2 movdqu xmmword ptr [rdx + 208], xmm3 aeskeygenassist xmm2, xmm3, 64 pshufd xmm2, xmm2, 255 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 vpslldq xmm4, xmm1, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu xmmword ptr [rdx + 224], xmm1 pxor xmm1, xmm1 pxor xmm2, xmm2 pxor xmm3, xmm3 pxor xmm4, xmm4 ret aes256_key_expansion endp ALIGN 16 aes256_keyhash_init proc mov r8, 579005069656919567 pinsrq xmm4, r8, 0 mov r8, 283686952306183 pinsrq xmm4, r8, 1 pxor xmm0, xmm0 movdqu xmmword ptr [rdx + 80], xmm0 mov r8, rcx movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pshufb xmm0, xmm4 mov rcx, rdx movdqu xmmword ptr [rcx + 32], xmm0 movdqu xmm0, xmm6 mov rax, r12 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 0], xmm1 movdqu xmm1, xmm6 movdqu xmm2, xmm6 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 16], xmm1 movdqu xmm2, xmm6 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 48], xmm1 movdqu xmm2, xmm6 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 64], xmm1 movdqu xmm2, xmm6 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 96], xmm1 movdqu xmm2, xmm6 movdqu xmm1, xmmword ptr [rcx + 32] movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm6, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 movdqu xmm5, xmm1 pclmulqdq xmm1, xmm2, 16 movdqu xmm3, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 1 movdqu xmm4, xmm1 movdqu xmm1, xmm5 pclmulqdq xmm1, xmm2, 0 pclmulqdq xmm5, xmm2, 17 movdqu xmm2, xmm5 movdqu xmm5, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm4 mov r12, 0 pinsrd xmm1, r12d, 0 pshufd xmm1, xmm1, 14 pxor xmm2, xmm1 movdqu xmm1, xmm3 mov r12, 0 pinsrd xmm1, r12d, 3 pshufd xmm1, xmm1, 79 mov r12, 0 pinsrd xmm4, r12d, 3 pshufd xmm4, xmm4, 79 pxor xmm1, xmm4 pxor xmm1, xmm5 movdqu xmm3, xmm1 psrld xmm3, 31 movdqu xmm4, xmm2 psrld xmm4, 31 pslld xmm1, 1 pslld xmm2, 1 vpslldq xmm5, xmm3, 4 vpslldq xmm4, xmm4, 4 mov r12, 0 pinsrd xmm3, r12d, 0 pshufd xmm3, xmm3, 3 pxor xmm3, xmm4 pxor xmm1, xmm5 pxor xmm2, xmm3 movdqu xmm5, xmm2 pxor xmm2, xmm2 mov r12, 3774873600 pinsrd xmm2, r12d, 3 pclmulqdq xmm1, xmm2, 17 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pxor xmm1, xmm5 pxor xmm1, xmm6 movdqu xmm6, xmm1 movdqu xmm3, xmm1 pxor xmm4, xmm4 pxor xmm5, xmm5 mov r12, 3254779904 pinsrd xmm4, r12d, 3 mov r12, 1 pinsrd xmm4, r12d, 0 mov r12, 2147483648 pinsrd xmm5, r12d, 3 movdqu xmm1, xmm3 movdqu xmm2, xmm1 psrld xmm2, 31 pslld xmm1, 1 vpslldq xmm2, xmm2, 4 pxor xmm1, xmm2 pand xmm3, xmm5 pcmpeqd xmm3, xmm5 pshufd xmm3, xmm3, 255 pand xmm3, xmm4 vpxor xmm1, xmm1, xmm3 movdqu xmmword ptr [rcx + 112], xmm1 movdqu xmm6, xmm0 mov r12, rax ret aes256_keyhash_init endp ALIGN 16 gctr128_bytes proc push r15 push r14 push r13 push r12 push rsi push rdi push rbp push rbx pextrq rax, xmm15, 0 push rax pextrq rax, xmm15, 1 push rax pextrq rax, xmm14, 0 push rax pextrq rax, xmm14, 1 push rax pextrq rax, xmm13, 0 push rax pextrq rax, xmm13, 1 push rax pextrq rax, xmm12, 0 push rax pextrq rax, xmm12, 1 push rax pextrq rax, xmm11, 0 push rax pextrq rax, xmm11, 1 push rax pextrq rax, xmm10, 0 push rax pextrq rax, xmm10, 1 push rax pextrq rax, xmm9, 0 push rax pextrq rax, xmm9, 1 push rax pextrq rax, xmm8, 0 push rax pextrq rax, xmm8, 1 push rax pextrq rax, xmm7, 0 push rax pextrq rax, xmm7, 1 push rax pextrq rax, xmm6, 0 push rax pextrq rax, xmm6, 1 push rax mov rax, qword ptr [rsp + 272] movdqu xmm7, xmmword ptr [rax + 0] mov rax, rcx mov rbx, r8 mov rsi, rdx mov r13, r9 mov r8, qword ptr [rsp + 264] mov rcx, qword ptr [rsp + 280] mov rbp, rcx imul rbp, 16 mov r12, 579005069656919567 pinsrq xmm8, r12, 0 mov r12, 283686952306183 pinsrq xmm8, r12, 1 mov rdx, rcx shr rdx, 2 and rcx, 3 cmp rdx, 0 jbe L0 mov r9, rax mov r10, rbx pshufb xmm7, xmm8 movdqu xmm9, xmm7 mov rax, 579005069656919567 pinsrq xmm0, rax, 0 mov rax, 579005069656919567 pinsrq xmm0, rax, 1 pshufb xmm9, xmm0 movdqu xmm10, xmm9 pxor xmm3, xmm3 mov rax, 1 pinsrd xmm3, eax, 2 paddd xmm9, xmm3 mov rax, 3 pinsrd xmm3, eax, 2 mov rax, 2 pinsrd xmm3, eax, 0 paddd xmm10, xmm3 pshufb xmm9, xmm8 pshufb xmm10, xmm8 pextrq rdi, xmm7, 0 mov rax, 283686952306183 pinsrq xmm0, rax, 0 mov rax, 579005069656919567 pinsrq xmm0, rax, 1 pxor xmm15, xmm15 mov rax, 4 pinsrd xmm15, eax, 0 mov rax, 4 pinsrd xmm15, eax, 2 jmp L3 ALIGN 16 L2: pinsrq xmm2, rdi, 0 pinsrq xmm12, rdi, 0 pinsrq xmm13, rdi, 0 pinsrq xmm14, rdi, 0 shufpd xmm2, xmm9, 2 shufpd xmm12, xmm9, 0 shufpd xmm13, xmm10, 2 shufpd xmm14, xmm10, 0 pshufb xmm9, xmm0 pshufb xmm10, xmm0 movdqu xmm3, xmmword ptr [r8 + 0] movdqu xmm4, xmmword ptr [r8 + 16] movdqu xmm5, xmmword ptr [r8 + 32] movdqu xmm6, xmmword ptr [r8 + 48] paddd xmm9, xmm15 paddd xmm10, xmm15 pxor xmm2, xmm3 pxor xmm12, xmm3 pxor xmm13, xmm3 pxor xmm14, xmm3 pshufb xmm9, xmm0 pshufb xmm10, xmm0 aesenc xmm2, xmm4 aesenc xmm12, xmm4 aesenc xmm13, xmm4 aesenc xmm14, xmm4 aesenc xmm2, xmm5 aesenc xmm12, xmm5 aesenc xmm13, xmm5 aesenc xmm14, xmm5 aesenc xmm2, xmm6 aesenc xmm12, xmm6 aesenc xmm13, xmm6 aesenc xmm14, xmm6 movdqu xmm3, xmmword ptr [r8 + 64] movdqu xmm4, xmmword ptr [r8 + 80] movdqu xmm5, xmmword ptr [r8 + 96] movdqu xmm6, xmmword ptr [r8 + 112] aesenc xmm2, xmm3 aesenc xmm12, xmm3 aesenc xmm13, xmm3 aesenc xmm14, xmm3 aesenc xmm2, xmm4 aesenc xmm12, xmm4 aesenc xmm13, xmm4 aesenc xmm14, xmm4 aesenc xmm2, xmm5 aesenc xmm12, xmm5 aesenc xmm13, xmm5 aesenc xmm14, xmm5 aesenc xmm2, xmm6 aesenc xmm12, xmm6 aesenc xmm13, xmm6 aesenc xmm14, xmm6 movdqu xmm3, xmmword ptr [r8 + 128] movdqu xmm4, xmmword ptr [r8 + 144] movdqu xmm5, xmmword ptr [r8 + 160] aesenc xmm2, xmm3 aesenc xmm12, xmm3 aesenc xmm13, xmm3 aesenc xmm14, xmm3 aesenc xmm2, xmm4 aesenc xmm12, xmm4 aesenc xmm13, xmm4 aesenc xmm14, xmm4 aesenclast xmm2, xmm5 aesenclast xmm12, xmm5 aesenclast xmm13, xmm5 aesenclast xmm14, xmm5 movdqu xmm7, xmmword ptr [r9 + 0] pxor xmm2, xmm7 movdqu xmm7, xmmword ptr [r9 + 16] pxor xmm12, xmm7 movdqu xmm7, xmmword ptr [r9 + 32] pxor xmm13, xmm7 movdqu xmm7, xmmword ptr [r9 + 48] pxor xmm14, xmm7 movdqu xmmword ptr [r10 + 0], xmm2 movdqu xmmword ptr [r10 + 16], xmm12 movdqu xmmword ptr [r10 + 32], xmm13 movdqu xmmword ptr [r10 + 48], xmm14 sub rdx, 1 add r9, 64 add r10, 64 ALIGN 16 L3: cmp rdx, 0 ja L2 movdqu xmm7, xmm9 pinsrq xmm7, rdi, 0 pshufb xmm7, xmm8 mov rax, r9 mov rbx, r10 jmp L1 L0: L1: mov rdx, 0 mov r9, rax mov r10, rbx pxor xmm4, xmm4 mov r12, 1 pinsrd xmm4, r12d, 0 jmp L5 ALIGN 16 L4: movdqu xmm0, xmm7 pshufb xmm0, xmm8 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 movdqu xmm2, xmmword ptr [r9 + 0] pxor xmm2, xmm0 movdqu xmmword ptr [r10 + 0], xmm2 add rdx, 1 add r9, 16 add r10, 16 paddd xmm7, xmm4 ALIGN 16 L5: cmp rdx, rcx jne L4 cmp rsi, rbp jbe L6 movdqu xmm1, xmmword ptr [r13 + 0] movdqu xmm0, xmm7 mov r12, 579005069656919567 pinsrq xmm2, r12, 0 mov r12, 283686952306183 pinsrq xmm2, r12, 1 pshufb xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pxor xmm1, xmm0 movdqu xmmword ptr [r13 + 0], xmm1 jmp L7 L6: L7: pop rax pinsrq xmm6, rax, 1 pop rax pinsrq xmm6, rax, 0 pop rax pinsrq xmm7, rax, 1 pop rax pinsrq xmm7, rax, 0 pop rax pinsrq xmm8, rax, 1 pop rax pinsrq xmm8, rax, 0 pop rax pinsrq xmm9, rax, 1 pop rax pinsrq xmm9, rax, 0 pop rax pinsrq xmm10, rax, 1 pop rax pinsrq xmm10, rax, 0 pop rax pinsrq xmm11, rax, 1 pop rax pinsrq xmm11, rax, 0 pop rax pinsrq xmm12, rax, 1 pop rax pinsrq xmm12, rax, 0 pop rax pinsrq xmm13, rax, 1 pop rax pinsrq xmm13, rax, 0 pop rax pinsrq xmm14, rax, 1 pop rax pinsrq xmm14, rax, 0 pop rax pinsrq xmm15, rax, 1 pop rax pinsrq xmm15, rax, 0 pop rbx pop rbp pop rdi pop rsi pop r12 pop r13 pop r14 pop r15 ret gctr128_bytes endp ALIGN 16 gctr256_bytes proc push r15 push r14 push r13 push r12 push rsi push rdi push rbp push rbx pextrq rax, xmm15, 0 push rax pextrq rax, xmm15, 1 push rax pextrq rax, xmm14, 0 push rax pextrq rax, xmm14, 1 push rax pextrq rax, xmm13, 0 push rax pextrq rax, xmm13, 1 push rax pextrq rax, xmm12, 0 push rax pextrq rax, xmm12, 1 push rax pextrq rax, xmm11, 0 push rax pextrq rax, xmm11, 1 push rax pextrq rax, xmm10, 0 push rax pextrq rax, xmm10, 1 push rax pextrq rax, xmm9, 0 push rax pextrq rax, xmm9, 1 push rax pextrq rax, xmm8, 0 push rax pextrq rax, xmm8, 1 push rax pextrq rax, xmm7, 0 push rax pextrq rax, xmm7, 1 push rax pextrq rax, xmm6, 0 push rax pextrq rax, xmm6, 1 push rax mov rax, qword ptr [rsp + 272] movdqu xmm7, xmmword ptr [rax + 0] mov rax, rcx mov rbx, r8 mov rsi, rdx mov r13, r9 mov r8, qword ptr [rsp + 264] mov rcx, qword ptr [rsp + 280] mov rbp, rcx imul rbp, 16 mov r12, 579005069656919567 pinsrq xmm8, r12, 0 mov r12, 283686952306183 pinsrq xmm8, r12, 1 mov rdx, rcx shr rdx, 2 and rcx, 3 cmp rdx, 0 jbe L8 mov r9, rax mov r10, rbx pshufb xmm7, xmm8 movdqu xmm9, xmm7 mov rax, 579005069656919567 pinsrq xmm0, rax, 0 mov rax, 579005069656919567 pinsrq xmm0, rax, 1 pshufb xmm9, xmm0 movdqu xmm10, xmm9 pxor xmm3, xmm3 mov rax, 1 pinsrd xmm3, eax, 2 paddd xmm9, xmm3 mov rax, 3 pinsrd xmm3, eax, 2 mov rax, 2 pinsrd xmm3, eax, 0 paddd xmm10, xmm3 pshufb xmm9, xmm8 pshufb xmm10, xmm8 pextrq rdi, xmm7, 0 mov rax, 283686952306183 pinsrq xmm0, rax, 0 mov rax, 579005069656919567 pinsrq xmm0, rax, 1 pxor xmm15, xmm15 mov rax, 4 pinsrd xmm15, eax, 0 mov rax, 4 pinsrd xmm15, eax, 2 jmp L11 ALIGN 16 L10: pinsrq xmm2, rdi, 0 pinsrq xmm12, rdi, 0 pinsrq xmm13, rdi, 0 pinsrq xmm14, rdi, 0 shufpd xmm2, xmm9, 2 shufpd xmm12, xmm9, 0 shufpd xmm13, xmm10, 2 shufpd xmm14, xmm10, 0 pshufb xmm9, xmm0 pshufb xmm10, xmm0 movdqu xmm3, xmmword ptr [r8 + 0] movdqu xmm4, xmmword ptr [r8 + 16] movdqu xmm5, xmmword ptr [r8 + 32] movdqu xmm6, xmmword ptr [r8 + 48] paddd xmm9, xmm15 paddd xmm10, xmm15 pxor xmm2, xmm3 pxor xmm12, xmm3 pxor xmm13, xmm3 pxor xmm14, xmm3 pshufb xmm9, xmm0 pshufb xmm10, xmm0 aesenc xmm2, xmm4 aesenc xmm12, xmm4 aesenc xmm13, xmm4 aesenc xmm14, xmm4 aesenc xmm2, xmm5 aesenc xmm12, xmm5 aesenc xmm13, xmm5 aesenc xmm14, xmm5 aesenc xmm2, xmm6 aesenc xmm12, xmm6 aesenc xmm13, xmm6 aesenc xmm14, xmm6 movdqu xmm3, xmmword ptr [r8 + 64] movdqu xmm4, xmmword ptr [r8 + 80] movdqu xmm5, xmmword ptr [r8 + 96] movdqu xmm6, xmmword ptr [r8 + 112] aesenc xmm2, xmm3 aesenc xmm12, xmm3 aesenc xmm13, xmm3 aesenc xmm14, xmm3 aesenc xmm2, xmm4 aesenc xmm12, xmm4 aesenc xmm13, xmm4 aesenc xmm14, xmm4 aesenc xmm2, xmm5 aesenc xmm12, xmm5 aesenc xmm13, xmm5 aesenc xmm14, xmm5 aesenc xmm2, xmm6 aesenc xmm12, xmm6 aesenc xmm13, xmm6 aesenc xmm14, xmm6 movdqu xmm3, xmmword ptr [r8 + 128] movdqu xmm4, xmmword ptr [r8 + 144] movdqu xmm5, xmmword ptr [r8 + 160] aesenc xmm2, xmm3 aesenc xmm12, xmm3 aesenc xmm13, xmm3 aesenc xmm14, xmm3 aesenc xmm2, xmm4 aesenc xmm12, xmm4 aesenc xmm13, xmm4 aesenc xmm14, xmm4 movdqu xmm3, xmm5 movdqu xmm4, xmmword ptr [r8 + 176] movdqu xmm5, xmmword ptr [r8 + 192] movdqu xmm6, xmmword ptr [r8 + 208] aesenc xmm2, xmm3 aesenc xmm12, xmm3 aesenc xmm13, xmm3 aesenc xmm14, xmm3 aesenc xmm2, xmm4 aesenc xmm12, xmm4 aesenc xmm13, xmm4 aesenc xmm14, xmm4 aesenc xmm2, xmm5 aesenc xmm12, xmm5 aesenc xmm13, xmm5 aesenc xmm14, xmm5 aesenc xmm2, xmm6 aesenc xmm12, xmm6 aesenc xmm13, xmm6 aesenc xmm14, xmm6 movdqu xmm5, xmmword ptr [r8 + 224] aesenclast xmm2, xmm5 aesenclast xmm12, xmm5 aesenclast xmm13, xmm5 aesenclast xmm14, xmm5 movdqu xmm7, xmmword ptr [r9 + 0] pxor xmm2, xmm7 movdqu xmm7, xmmword ptr [r9 + 16] pxor xmm12, xmm7 movdqu xmm7, xmmword ptr [r9 + 32] pxor xmm13, xmm7 movdqu xmm7, xmmword ptr [r9 + 48] pxor xmm14, xmm7 movdqu xmmword ptr [r10 + 0], xmm2 movdqu xmmword ptr [r10 + 16], xmm12 movdqu xmmword ptr [r10 + 32], xmm13 movdqu xmmword ptr [r10 + 48], xmm14 sub rdx, 1 add r9, 64 add r10, 64 ALIGN 16 L11: cmp rdx, 0 ja L10 movdqu xmm7, xmm9 pinsrq xmm7, rdi, 0 pshufb xmm7, xmm8 mov rax, r9 mov rbx, r10 jmp L9 L8: L9: mov rdx, 0 mov r9, rax mov r10, rbx pxor xmm4, xmm4 mov r12, 1 pinsrd xmm4, r12d, 0 jmp L13 ALIGN 16 L12: movdqu xmm0, xmm7 pshufb xmm0, xmm8 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 movdqu xmm2, xmmword ptr [r9 + 0] pxor xmm2, xmm0 movdqu xmmword ptr [r10 + 0], xmm2 add rdx, 1 add r9, 16 add r10, 16 paddd xmm7, xmm4 ALIGN 16 L13: cmp rdx, rcx jne L12 cmp rsi, rbp jbe L14 movdqu xmm1, xmmword ptr [r13 + 0] movdqu xmm0, xmm7 mov r12, 579005069656919567 pinsrq xmm2, r12, 0 mov r12, 283686952306183 pinsrq xmm2, r12, 1 pshufb xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pxor xmm1, xmm0 movdqu xmmword ptr [r13 + 0], xmm1 jmp L15 L14: L15: pop rax pinsrq xmm6, rax, 1 pop rax pinsrq xmm6, rax, 0 pop rax pinsrq xmm7, rax, 1 pop rax pinsrq xmm7, rax, 0 pop rax pinsrq xmm8, rax, 1 pop rax pinsrq xmm8, rax, 0 pop rax pinsrq xmm9, rax, 1 pop rax pinsrq xmm9, rax, 0 pop rax pinsrq xmm10, rax, 1 pop rax pinsrq xmm10, rax, 0 pop rax pinsrq xmm11, rax, 1 pop rax pinsrq xmm11, rax, 0 pop rax pinsrq xmm12, rax, 1 pop rax pinsrq xmm12, rax, 0 pop rax pinsrq xmm13, rax, 1 pop rax pinsrq xmm13, rax, 0 pop rax pinsrq xmm14, rax, 1 pop rax pinsrq xmm14, rax, 0 pop rax pinsrq xmm15, rax, 1 pop rax pinsrq xmm15, rax, 0 pop rbx pop rbp pop rdi pop rsi pop r12 pop r13 pop r14 pop r15 ret gctr256_bytes endp ALIGN 16 compute_iv_stdcall proc cmp rdx, 12 jne L16 push rdi push rsi mov rdi, rcx mov rsi, rdx mov rdx, r8 mov rcx, r9 mov r8, qword ptr [rsp + 56] mov r9, qword ptr [rsp + 64] cmp rsi, 12 jne L18 movdqu xmm0, xmmword ptr [r8 + 0] mov rax, 579005069656919567 pinsrq xmm1, rax, 0 mov rax, 283686952306183 pinsrq xmm1, rax, 1 pshufb xmm0, xmm1 mov rax, 1 pinsrd xmm0, eax, 0 movdqu xmmword ptr [rcx + 0], xmm0 jmp L19 L18: mov rax, rcx add r9, 32 mov rbx, r8 mov rcx, rdx imul rcx, 16 mov r10, 579005069656919567 pinsrq xmm9, r10, 0 mov r10, 283686952306183 pinsrq xmm9, r10, 1 pxor xmm8, xmm8 mov r11, rdi jmp L21 ALIGN 16 L20: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L21: cmp rdx, 6 jae L20 cmp rdx, 0 jbe L22 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L24 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L25 L24: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L26 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L28 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L30 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L31 L30: L31: jmp L29 L28: L29: jmp L27 L26: L27: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L25: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L23 L22: L23: mov r15, rsi cmp rsi, rcx jbe L32 movdqu xmm0, xmmword ptr [rbx + 0] mov r10, rsi and r10, 15 cmp r10, 8 jae L34 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L35 L34: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L35: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L33 L32: L33: mov rcx, rax mov r11, 0 mov r13, rsi pxor xmm0, xmm0 mov rax, r11 imul rax, 8 pinsrq xmm0, rax, 1 mov rax, r13 imul rax, 8 pinsrq xmm0, rax, 0 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 movdqu xmmword ptr [rcx + 0], xmm8 L19: pop rsi pop rdi jmp L17 L16: push r15 push r14 push r13 push r12 push rsi push rdi push rbp push rbx pextrq rax, xmm15, 0 push rax pextrq rax, xmm15, 1 push rax pextrq rax, xmm14, 0 push rax pextrq rax, xmm14, 1 push rax pextrq rax, xmm13, 0 push rax pextrq rax, xmm13, 1 push rax pextrq rax, xmm12, 0 push rax pextrq rax, xmm12, 1 push rax pextrq rax, xmm11, 0 push rax pextrq rax, xmm11, 1 push rax pextrq rax, xmm10, 0 push rax pextrq rax, xmm10, 1 push rax pextrq rax, xmm9, 0 push rax pextrq rax, xmm9, 1 push rax pextrq rax, xmm8, 0 push rax pextrq rax, xmm8, 1 push rax pextrq rax, xmm7, 0 push rax pextrq rax, xmm7, 1 push rax pextrq rax, xmm6, 0 push rax pextrq rax, xmm6, 1 push rax mov rdi, rcx mov rsi, rdx mov rdx, r8 mov rcx, r9 mov r8, qword ptr [rsp + 264] mov r9, qword ptr [rsp + 272] cmp rsi, 12 jne L36 movdqu xmm0, xmmword ptr [r8 + 0] mov rax, 579005069656919567 pinsrq xmm1, rax, 0 mov rax, 283686952306183 pinsrq xmm1, rax, 1 pshufb xmm0, xmm1 mov rax, 1 pinsrd xmm0, eax, 0 movdqu xmmword ptr [rcx + 0], xmm0 jmp L37 L36: mov rax, rcx add r9, 32 mov rbx, r8 mov rcx, rdx imul rcx, 16 mov r10, 579005069656919567 pinsrq xmm9, r10, 0 mov r10, 283686952306183 pinsrq xmm9, r10, 1 pxor xmm8, xmm8 mov r11, rdi jmp L39 ALIGN 16 L38: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L39: cmp rdx, 6 jae L38 cmp rdx, 0 jbe L40 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L42 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L43 L42: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L44 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L46 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L48 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L49 L48: L49: jmp L47 L46: L47: jmp L45 L44: L45: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L43: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L41 L40: L41: mov r15, rsi cmp rsi, rcx jbe L50 movdqu xmm0, xmmword ptr [rbx + 0] mov r10, rsi and r10, 15 cmp r10, 8 jae L52 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L53 L52: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L53: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L51 L50: L51: mov rcx, rax mov r11, 0 mov r13, rsi pxor xmm0, xmm0 mov rax, r11 imul rax, 8 pinsrq xmm0, rax, 1 mov rax, r13 imul rax, 8 pinsrq xmm0, rax, 0 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 movdqu xmmword ptr [rcx + 0], xmm8 L37: pop rax pinsrq xmm6, rax, 1 pop rax pinsrq xmm6, rax, 0 pop rax pinsrq xmm7, rax, 1 pop rax pinsrq xmm7, rax, 0 pop rax pinsrq xmm8, rax, 1 pop rax pinsrq xmm8, rax, 0 pop rax pinsrq xmm9, rax, 1 pop rax pinsrq xmm9, rax, 0 pop rax pinsrq xmm10, rax, 1 pop rax pinsrq xmm10, rax, 0 pop rax pinsrq xmm11, rax, 1 pop rax pinsrq xmm11, rax, 0 pop rax pinsrq xmm12, rax, 1 pop rax pinsrq xmm12, rax, 0 pop rax pinsrq xmm13, rax, 1 pop rax pinsrq xmm13, rax, 0 pop rax pinsrq xmm14, rax, 1 pop rax pinsrq xmm14, rax, 0 pop rax pinsrq xmm15, rax, 1 pop rax pinsrq xmm15, rax, 0 pop rbx pop rbp pop rdi pop rsi pop r12 pop r13 pop r14 pop r15 L17: ret compute_iv_stdcall endp ALIGN 16 gcm128_encrypt_opt proc push r15 push r14 push r13 push r12 push rsi push rdi push rbp push rbx pextrq rax, xmm15, 0 push rax pextrq rax, xmm15, 1 push rax pextrq rax, xmm14, 0 push rax pextrq rax, xmm14, 1 push rax pextrq rax, xmm13, 0 push rax pextrq rax, xmm13, 1 push rax pextrq rax, xmm12, 0 push rax pextrq rax, xmm12, 1 push rax pextrq rax, xmm11, 0 push rax pextrq rax, xmm11, 1 push rax pextrq rax, xmm10, 0 push rax pextrq rax, xmm10, 1 push rax pextrq rax, xmm9, 0 push rax pextrq rax, xmm9, 1 push rax pextrq rax, xmm8, 0 push rax pextrq rax, xmm8, 1 push rax pextrq rax, xmm7, 0 push rax pextrq rax, xmm7, 1 push rax pextrq rax, xmm6, 0 push rax pextrq rax, xmm6, 1 push rax mov rdi, rcx mov rsi, rdx mov rdx, r8 mov rcx, r9 mov r8, qword ptr [rsp + 264] mov r9, qword ptr [rsp + 272] mov rbp, qword ptr [rsp + 352] mov r13, rcx lea r9, qword ptr [r9 + 32] mov rbx, qword ptr [rsp + 280] mov rcx, rdx imul rcx, 16 mov r10, 579005069656919567 pinsrq xmm9, r10, 0 mov r10, 283686952306183 pinsrq xmm9, r10, 1 pxor xmm8, xmm8 mov r11, rdi jmp L55 ALIGN 16 L54: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L55: cmp rdx, 6 jae L54 cmp rdx, 0 jbe L56 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L58 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L59 L58: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L60 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L62 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L64 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L65 L64: L65: jmp L63 L62: L63: jmp L61 L60: L61: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L59: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L57 L56: L57: mov r15, rsi cmp rsi, rcx jbe L66 movdqu xmm0, xmmword ptr [rbx + 0] mov r10, rsi and r10, 15 cmp r10, 8 jae L68 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L69 L68: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L69: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L67 L66: L67: mov rdi, qword ptr [rsp + 288] mov rsi, qword ptr [rsp + 296] mov rdx, qword ptr [rsp + 304] mov rcx, r13 movdqu xmm0, xmm9 movdqu xmm1, xmmword ptr [r8 + 0] movdqu xmmword ptr [rbp + 0], xmm1 pxor xmm10, xmm10 mov r11, 1 pinsrq xmm10, r11, 0 vpaddd xmm1, xmm1, xmm10 cmp rdx, 0 jne L70 vpshufb xmm1, xmm1, xmm0 movdqu xmmword ptr [rbp + 32], xmm1 jmp L71 L70: movdqu xmmword ptr [rbp + 32], xmm8 add rcx, 128 pextrq rbx, xmm1, 0 and rbx, 255 vpshufb xmm1, xmm1, xmm0 lea r14, qword ptr [rsi + 96] movdqu xmm4, xmmword ptr [rcx + -128] pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 movdqu xmm15, xmmword ptr [rcx + -112] mov r12, rcx sub r12, 96 vpxor xmm9, xmm1, xmm4 add rbx, 6 cmp rbx, 256 jae L72 vpaddd xmm10, xmm1, xmm2 vpaddd xmm11, xmm10, xmm2 vpxor xmm10, xmm10, xmm4 vpaddd xmm12, xmm11, xmm2 vpxor xmm11, xmm11, xmm4 vpaddd xmm13, xmm12, xmm2 vpxor xmm12, xmm12, xmm4 vpaddd xmm14, xmm13, xmm2 vpxor xmm13, xmm13, xmm4 vpaddd xmm1, xmm14, xmm2 vpxor xmm14, xmm14, xmm4 jmp L73 L72: sub rbx, 256 vpshufb xmm6, xmm1, xmm0 pxor xmm5, xmm5 mov r11, 1 pinsrq xmm5, r11, 0 vpaddd xmm10, xmm6, xmm5 pxor xmm5, xmm5 mov r11, 2 pinsrq xmm5, r11, 0 vpaddd xmm11, xmm6, xmm5 vpaddd xmm12, xmm10, xmm5 vpshufb xmm10, xmm10, xmm0 vpaddd xmm13, xmm11, xmm5 vpshufb xmm11, xmm11, xmm0 vpxor xmm10, xmm10, xmm4 vpaddd xmm14, xmm12, xmm5 vpshufb xmm12, xmm12, xmm0 vpxor xmm11, xmm11, xmm4 vpaddd xmm1, xmm13, xmm5 vpshufb xmm13, xmm13, xmm0 vpxor xmm12, xmm12, xmm4 vpshufb xmm14, xmm14, xmm0 vpxor xmm13, xmm13, xmm4 vpshufb xmm1, xmm1, xmm0 vpxor xmm14, xmm14, xmm4 L73: vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -96] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -80] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -64] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -48] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -32] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -16] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 0] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 16] movdqu xmm3, xmmword ptr [rcx + 32] vaesenc xmm9, xmm9, xmm15 vpxor xmm4, xmm3, xmmword ptr [rdi + 0] vaesenc xmm10, xmm10, xmm15 vpxor xmm5, xmm3, xmmword ptr [rdi + 16] vaesenc xmm11, xmm11, xmm15 vpxor xmm6, xmm3, xmmword ptr [rdi + 32] vaesenc xmm12, xmm12, xmm15 vpxor xmm8, xmm3, xmmword ptr [rdi + 48] vaesenc xmm13, xmm13, xmm15 vpxor xmm2, xmm3, xmmword ptr [rdi + 64] vaesenc xmm14, xmm14, xmm15 vpxor xmm3, xmm3, xmmword ptr [rdi + 80] lea rdi, qword ptr [rdi + 96] vaesenclast xmm9, xmm9, xmm4 vaesenclast xmm10, xmm10, xmm5 vaesenclast xmm11, xmm11, xmm6 vaesenclast xmm12, xmm12, xmm8 vaesenclast xmm13, xmm13, xmm2 vaesenclast xmm14, xmm14, xmm3 movdqu xmmword ptr [rsi + 0], xmm9 movdqu xmmword ptr [rsi + 16], xmm10 movdqu xmmword ptr [rsi + 32], xmm11 movdqu xmmword ptr [rsi + 48], xmm12 movdqu xmmword ptr [rsi + 64], xmm13 movdqu xmmword ptr [rsi + 80], xmm14 lea rsi, qword ptr [rsi + 96] vpshufb xmm8, xmm9, xmm0 vpshufb xmm2, xmm10, xmm0 movdqu xmmword ptr [rbp + 112], xmm8 vpshufb xmm4, xmm11, xmm0 movdqu xmmword ptr [rbp + 96], xmm2 vpshufb xmm5, xmm12, xmm0 movdqu xmmword ptr [rbp + 80], xmm4 vpshufb xmm6, xmm13, xmm0 movdqu xmmword ptr [rbp + 64], xmm5 vpshufb xmm7, xmm14, xmm0 movdqu xmmword ptr [rbp + 48], xmm6 movdqu xmm4, xmmword ptr [rcx + -128] pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 movdqu xmm15, xmmword ptr [rcx + -112] mov r12, rcx sub r12, 96 vpxor xmm9, xmm1, xmm4 add rbx, 6 cmp rbx, 256 jae L74 vpaddd xmm10, xmm1, xmm2 vpaddd xmm11, xmm10, xmm2 vpxor xmm10, xmm10, xmm4 vpaddd xmm12, xmm11, xmm2 vpxor xmm11, xmm11, xmm4 vpaddd xmm13, xmm12, xmm2 vpxor xmm12, xmm12, xmm4 vpaddd xmm14, xmm13, xmm2 vpxor xmm13, xmm13, xmm4 vpaddd xmm1, xmm14, xmm2 vpxor xmm14, xmm14, xmm4 jmp L75 L74: sub rbx, 256 vpshufb xmm6, xmm1, xmm0 pxor xmm5, xmm5 mov r11, 1 pinsrq xmm5, r11, 0 vpaddd xmm10, xmm6, xmm5 pxor xmm5, xmm5 mov r11, 2 pinsrq xmm5, r11, 0 vpaddd xmm11, xmm6, xmm5 vpaddd xmm12, xmm10, xmm5 vpshufb xmm10, xmm10, xmm0 vpaddd xmm13, xmm11, xmm5 vpshufb xmm11, xmm11, xmm0 vpxor xmm10, xmm10, xmm4 vpaddd xmm14, xmm12, xmm5 vpshufb xmm12, xmm12, xmm0 vpxor xmm11, xmm11, xmm4 vpaddd xmm1, xmm13, xmm5 vpshufb xmm13, xmm13, xmm0 vpxor xmm12, xmm12, xmm4 vpshufb xmm14, xmm14, xmm0 vpxor xmm13, xmm13, xmm4 vpshufb xmm1, xmm1, xmm0 vpxor xmm14, xmm14, xmm4 L75: vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -96] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -80] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -64] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -48] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -32] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -16] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 0] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 16] movdqu xmm3, xmmword ptr [rcx + 32] vaesenc xmm9, xmm9, xmm15 vpxor xmm4, xmm3, xmmword ptr [rdi + 0] vaesenc xmm10, xmm10, xmm15 vpxor xmm5, xmm3, xmmword ptr [rdi + 16] vaesenc xmm11, xmm11, xmm15 vpxor xmm6, xmm3, xmmword ptr [rdi + 32] vaesenc xmm12, xmm12, xmm15 vpxor xmm8, xmm3, xmmword ptr [rdi + 48] vaesenc xmm13, xmm13, xmm15 vpxor xmm2, xmm3, xmmword ptr [rdi + 64] vaesenc xmm14, xmm14, xmm15 vpxor xmm3, xmm3, xmmword ptr [rdi + 80] lea rdi, qword ptr [rdi + 96] vaesenclast xmm9, xmm9, xmm4 vaesenclast xmm10, xmm10, xmm5 vaesenclast xmm11, xmm11, xmm6 vaesenclast xmm12, xmm12, xmm8 vaesenclast xmm13, xmm13, xmm2 vaesenclast xmm14, xmm14, xmm3 movdqu xmmword ptr [rsi + 0], xmm9 movdqu xmmword ptr [rsi + 16], xmm10 movdqu xmmword ptr [rsi + 32], xmm11 movdqu xmmword ptr [rsi + 48], xmm12 movdqu xmmword ptr [rsi + 64], xmm13 movdqu xmmword ptr [rsi + 80], xmm14 lea rsi, qword ptr [rsi + 96] sub rdx, 12 movdqu xmm8, xmmword ptr [rbp + 32] pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 vpxor xmm4, xmm4, xmm4 movdqu xmm15, xmmword ptr [rcx + -128] vpaddd xmm10, xmm1, xmm2 vpaddd xmm11, xmm10, xmm2 vpaddd xmm12, xmm11, xmm2 vpaddd xmm13, xmm12, xmm2 vpaddd xmm14, xmm13, xmm2 vpxor xmm9, xmm1, xmm15 movdqu xmmword ptr [rbp + 16], xmm4 jmp L77 ALIGN 16 L76: add rbx, 6 cmp rbx, 256 jb L78 mov r11, 579005069656919567 pinsrq xmm0, r11, 0 mov r11, 283686952306183 pinsrq xmm0, r11, 1 vpshufb xmm6, xmm1, xmm0 pxor xmm5, xmm5 mov r11, 1 pinsrq xmm5, r11, 0 vpaddd xmm10, xmm6, xmm5 pxor xmm5, xmm5 mov r11, 2 pinsrq xmm5, r11, 0 vpaddd xmm11, xmm6, xmm5 movdqu xmm3, xmmword ptr [r9 + -32] vpaddd xmm12, xmm10, xmm5 vpshufb xmm10, xmm10, xmm0 vpaddd xmm13, xmm11, xmm5 vpshufb xmm11, xmm11, xmm0 vpxor xmm10, xmm10, xmm15 vpaddd xmm14, xmm12, xmm5 vpshufb xmm12, xmm12, xmm0 vpxor xmm11, xmm11, xmm15 vpaddd xmm1, xmm13, xmm5 vpshufb xmm13, xmm13, xmm0 vpshufb xmm14, xmm14, xmm0 vpshufb xmm1, xmm1, xmm0 sub rbx, 256 jmp L79 L78: movdqu xmm3, xmmword ptr [r9 + -32] vpaddd xmm1, xmm2, xmm14 vpxor xmm10, xmm10, xmm15 vpxor xmm11, xmm11, xmm15 L79: movdqu xmmword ptr [rbp + 128], xmm1 vpclmulqdq xmm5, xmm7, xmm3, 16 vpxor xmm12, xmm12, xmm15 movdqu xmm2, xmmword ptr [rcx + -112] vpclmulqdq xmm6, xmm7, xmm3, 1 vaesenc xmm9, xmm9, xmm2 movdqu xmm0, xmmword ptr [rbp + 48] vpxor xmm13, xmm13, xmm15 vpclmulqdq xmm1, xmm7, xmm3, 0 vaesenc xmm10, xmm10, xmm2 vpxor xmm14, xmm14, xmm15 vpclmulqdq xmm7, xmm7, xmm3, 17 vaesenc xmm11, xmm11, xmm2 movdqu xmm3, xmmword ptr [r9 + -16] vaesenc xmm12, xmm12, xmm2 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm3, 0 vpxor xmm8, xmm8, xmm4 vaesenc xmm13, xmm13, xmm2 vpxor xmm4, xmm1, xmm5 vpclmulqdq xmm1, xmm0, xmm3, 16 vaesenc xmm14, xmm14, xmm2 movdqu xmm15, xmmword ptr [rcx + -96] vpclmulqdq xmm2, xmm0, xmm3, 1 vaesenc xmm9, xmm9, xmm15 vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpclmulqdq xmm3, xmm0, xmm3, 17 movdqu xmm0, xmmword ptr [rbp + 64] vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 88] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 80] vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 32], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 40], r12 movdqu xmm5, xmmword ptr [r9 + 16] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -80] vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm0, xmm5, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm5, 16 vaesenc xmm10, xmm10, xmm15 vpxor xmm7, xmm7, xmm3 vpclmulqdq xmm3, xmm0, xmm5, 1 vaesenc xmm11, xmm11, xmm15 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [rbp + 80] vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -64] vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm1, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm1, 16 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 72] vpxor xmm7, xmm7, xmm5 vpclmulqdq xmm5, xmm0, xmm1, 1 vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 64] vpclmulqdq xmm1, xmm0, xmm1, 17 movdqu xmm0, xmmword ptr [rbp + 96] vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 48], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 56], r12 vpxor xmm4, xmm4, xmm2 movdqu xmm2, xmmword ptr [r9 + 64] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -48] vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm2, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm2, 16 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 56] vpxor xmm7, xmm7, xmm1 vpclmulqdq xmm1, xmm0, xmm2, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 112] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 48] vpclmulqdq xmm2, xmm0, xmm2, 17 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 64], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 72], r12 vpxor xmm4, xmm4, xmm3 movdqu xmm3, xmmword ptr [r9 + 80] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -32] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm8, xmm3, 16 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm8, xmm3, 1 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 40] vpxor xmm7, xmm7, xmm2 vpclmulqdq xmm2, xmm8, xmm3, 0 vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 32] vpclmulqdq xmm8, xmm8, xmm3, 17 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 80], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 88], r12 vpxor xmm6, xmm6, xmm5 vaesenc xmm14, xmm14, xmm15 vpxor xmm6, xmm6, xmm1 movdqu xmm15, xmmword ptr [rcx + -16] vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm2 pxor xmm3, xmm3 mov r11, 13979173243358019584 pinsrq xmm3, r11, 1 vaesenc xmm9, xmm9, xmm15 vpxor xmm7, xmm7, xmm8 vaesenc xmm10, xmm10, xmm15 vpxor xmm4, xmm4, xmm5 movbe r13, qword ptr [r14 + 24] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 16] vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 mov qword ptr [rbp + 96], r13 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 104], r12 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm1, xmmword ptr [rcx + 0] vaesenc xmm9, xmm9, xmm1 movdqu xmm15, xmmword ptr [rcx + 16] vaesenc xmm10, xmm10, xmm1 vpsrldq xmm6, xmm6, 8 vaesenc xmm11, xmm11, xmm1 vpxor xmm7, xmm7, xmm6 vaesenc xmm12, xmm12, xmm1 vpxor xmm4, xmm4, xmm0 movbe r13, qword ptr [r14 + 8] vaesenc xmm13, xmm13, xmm1 movbe r12, qword ptr [r14 + 0] vaesenc xmm14, xmm14, xmm1 movdqu xmm1, xmmword ptr [rcx + 32] vaesenc xmm9, xmm9, xmm15 movdqu xmmword ptr [rbp + 16], xmm7 vpalignr xmm8, xmm4, xmm4, 8 vaesenc xmm10, xmm10, xmm15 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm2, xmm1, xmmword ptr [rdi + 0] vaesenc xmm11, xmm11, xmm15 vpxor xmm0, xmm1, xmmword ptr [rdi + 16] vaesenc xmm12, xmm12, xmm15 vpxor xmm5, xmm1, xmmword ptr [rdi + 32] vaesenc xmm13, xmm13, xmm15 vpxor xmm6, xmm1, xmmword ptr [rdi + 48] vaesenc xmm14, xmm14, xmm15 vpxor xmm7, xmm1, xmmword ptr [rdi + 64] vpxor xmm3, xmm1, xmmword ptr [rdi + 80] movdqu xmm1, xmmword ptr [rbp + 128] vaesenclast xmm9, xmm9, xmm2 pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 vaesenclast xmm10, xmm10, xmm0 vpaddd xmm0, xmm1, xmm2 mov qword ptr [rbp + 112], r13 lea rdi, qword ptr [rdi + 96] vaesenclast xmm11, xmm11, xmm5 vpaddd xmm5, xmm0, xmm2 mov qword ptr [rbp + 120], r12 lea rsi, qword ptr [rsi + 96] movdqu xmm15, xmmword ptr [rcx + -128] vaesenclast xmm12, xmm12, xmm6 vpaddd xmm6, xmm5, xmm2 vaesenclast xmm13, xmm13, xmm7 vpaddd xmm7, xmm6, xmm2 vaesenclast xmm14, xmm14, xmm3 vpaddd xmm3, xmm7, xmm2 sub rdx, 6 add r14, 96 cmp rdx, 0 jbe L80 movdqu xmmword ptr [rsi + -96], xmm9 vpxor xmm9, xmm1, xmm15 movdqu xmmword ptr [rsi + -80], xmm10 movdqu xmm10, xmm0 movdqu xmmword ptr [rsi + -64], xmm11 movdqu xmm11, xmm5 movdqu xmmword ptr [rsi + -48], xmm12 movdqu xmm12, xmm6 movdqu xmmword ptr [rsi + -32], xmm13 movdqu xmm13, xmm7 movdqu xmmword ptr [rsi + -16], xmm14 movdqu xmm14, xmm3 movdqu xmm7, xmmword ptr [rbp + 32] jmp L81 L80: vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpxor xmm8, xmm8, xmm4 L81: ALIGN 16 L77: cmp rdx, 0 ja L76 movdqu xmm7, xmmword ptr [rbp + 32] movdqu xmmword ptr [rbp + 32], xmm1 pxor xmm4, xmm4 movdqu xmmword ptr [rbp + 16], xmm4 movdqu xmm3, xmmword ptr [r9 + -32] vpclmulqdq xmm1, xmm7, xmm3, 0 vpclmulqdq xmm5, xmm7, xmm3, 16 movdqu xmm0, xmmword ptr [rbp + 48] vpclmulqdq xmm6, xmm7, xmm3, 1 vpclmulqdq xmm7, xmm7, xmm3, 17 movdqu xmm3, xmmword ptr [r9 + -16] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm3, 0 vpxor xmm8, xmm8, xmm4 vpxor xmm4, xmm1, xmm5 vpclmulqdq xmm1, xmm0, xmm3, 16 vpclmulqdq xmm2, xmm0, xmm3, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpclmulqdq xmm3, xmm0, xmm3, 17 movdqu xmm0, xmmword ptr [rbp + 64] movdqu xmm5, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm0, xmm5, 0 vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm5, 16 vpxor xmm7, xmm7, xmm3 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [rbp + 80] vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm1, 0 vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm1, 16 vpxor xmm7, xmm7, xmm5 vpclmulqdq xmm5, xmm0, xmm1, 1 vpclmulqdq xmm1, xmm0, xmm1, 17 movdqu xmm0, xmmword ptr [rbp + 96] vpxor xmm4, xmm4, xmm2 movdqu xmm2, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm2, 0 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm2, 16 vpxor xmm7, xmm7, xmm1 vpclmulqdq xmm1, xmm0, xmm2, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 112] vpclmulqdq xmm2, xmm0, xmm2, 17 vpxor xmm4, xmm4, xmm3 movdqu xmm3, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm8, xmm3, 16 vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm8, xmm3, 1 vpxor xmm7, xmm7, xmm2 vpclmulqdq xmm2, xmm8, xmm3, 0 vpclmulqdq xmm8, xmm8, xmm3, 17 vpxor xmm6, xmm6, xmm5 vpxor xmm6, xmm6, xmm1 vpxor xmm4, xmm4, xmm2 pxor xmm3, xmm3 mov rax, 3254779904 pinsrd xmm3, eax, 3 vpxor xmm7, xmm7, xmm8 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 mov r12, 579005069656919567 pinsrq xmm0, r12, 0 mov r12, 283686952306183 pinsrq xmm0, r12, 1 movdqu xmmword ptr [rsi + -96], xmm9 vpshufb xmm9, xmm9, xmm0 vpxor xmm1, xmm1, xmm7 movdqu xmmword ptr [rsi + -80], xmm10 vpshufb xmm10, xmm10, xmm0 movdqu xmmword ptr [rsi + -64], xmm11 vpshufb xmm11, xmm11, xmm0 movdqu xmmword ptr [rsi + -48], xmm12 vpshufb xmm12, xmm12, xmm0 movdqu xmmword ptr [rsi + -32], xmm13 vpshufb xmm13, xmm13, xmm0 movdqu xmmword ptr [rsi + -16], xmm14 vpshufb xmm14, xmm14, xmm0 pxor xmm4, xmm4 movdqu xmm7, xmm14 movdqu xmmword ptr [rbp + 16], xmm4 movdqu xmmword ptr [rbp + 48], xmm13 movdqu xmmword ptr [rbp + 64], xmm12 movdqu xmmword ptr [rbp + 80], xmm11 movdqu xmmword ptr [rbp + 96], xmm10 movdqu xmmword ptr [rbp + 112], xmm9 movdqu xmm3, xmmword ptr [r9 + -32] vpclmulqdq xmm1, xmm7, xmm3, 0 vpclmulqdq xmm5, xmm7, xmm3, 16 movdqu xmm0, xmmword ptr [rbp + 48] vpclmulqdq xmm6, xmm7, xmm3, 1 vpclmulqdq xmm7, xmm7, xmm3, 17 movdqu xmm3, xmmword ptr [r9 + -16] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm3, 0 vpxor xmm8, xmm8, xmm4 vpxor xmm4, xmm1, xmm5 vpclmulqdq xmm1, xmm0, xmm3, 16 vpclmulqdq xmm2, xmm0, xmm3, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpclmulqdq xmm3, xmm0, xmm3, 17 movdqu xmm0, xmmword ptr [rbp + 64] movdqu xmm5, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm0, xmm5, 0 vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm5, 16 vpxor xmm7, xmm7, xmm3 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [rbp + 80] vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm1, 0 vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm1, 16 vpxor xmm7, xmm7, xmm5 vpclmulqdq xmm5, xmm0, xmm1, 1 vpclmulqdq xmm1, xmm0, xmm1, 17 movdqu xmm0, xmmword ptr [rbp + 96] vpxor xmm4, xmm4, xmm2 movdqu xmm2, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm2, 0 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm2, 16 vpxor xmm7, xmm7, xmm1 vpclmulqdq xmm1, xmm0, xmm2, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 112] vpclmulqdq xmm2, xmm0, xmm2, 17 vpxor xmm4, xmm4, xmm3 movdqu xmm3, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm8, xmm3, 16 vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm8, xmm3, 1 vpxor xmm7, xmm7, xmm2 vpclmulqdq xmm2, xmm8, xmm3, 0 vpclmulqdq xmm8, xmm8, xmm3, 17 vpxor xmm6, xmm6, xmm5 vpxor xmm6, xmm6, xmm1 vpxor xmm4, xmm4, xmm2 pxor xmm3, xmm3 mov rax, 3254779904 pinsrd xmm3, eax, 3 vpxor xmm7, xmm7, xmm8 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 sub rcx, 128 L71: movdqu xmm11, xmmword ptr [rbp + 32] mov r8, rcx mov rax, qword ptr [rsp + 312] mov rdi, qword ptr [rsp + 320] mov rdx, qword ptr [rsp + 328] mov r14, rdx mov r12, 579005069656919567 pinsrq xmm9, r12, 0 mov r12, 283686952306183 pinsrq xmm9, r12, 1 pshufb xmm11, xmm9 pxor xmm10, xmm10 mov rbx, 1 pinsrd xmm10, ebx, 0 mov r11, rax mov r10, rdi mov rbx, 0 jmp L83 ALIGN 16 L82: movdqu xmm0, xmm11 pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 movdqu xmm2, xmmword ptr [r11 + 0] pxor xmm2, xmm0 movdqu xmmword ptr [r10 + 0], xmm2 add rbx, 1 add r11, 16 add r10, 16 paddd xmm11, xmm10 ALIGN 16 L83: cmp rbx, rdx jne L82 mov r11, rdi jmp L85 ALIGN 16 L84: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L85: cmp rdx, 6 jae L84 cmp rdx, 0 jbe L86 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L88 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L89 L88: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L90 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L92 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L94 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L95 L94: L95: jmp L93 L92: L93: jmp L91 L90: L91: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L89: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L87 L86: L87: add r14, qword ptr [rsp + 304] imul r14, 16 mov r13, qword ptr [rsp + 344] cmp r13, r14 jbe L96 mov rax, qword ptr [rsp + 336] mov r10, r13 and r10, 15 movdqu xmm0, xmm11 pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 movdqu xmm4, xmmword ptr [rax + 0] pxor xmm0, xmm4 movdqu xmmword ptr [rax + 0], xmm0 cmp r10, 8 jae L98 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L99 L98: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L99: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L97 L96: L97: mov r11, r15 pxor xmm0, xmm0 mov rax, r11 imul rax, 8 pinsrq xmm0, rax, 1 mov rax, r13 imul rax, 8 pinsrq xmm0, rax, 0 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 movdqu xmm0, xmmword ptr [rbp + 0] pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pshufb xmm8, xmm9 pxor xmm8, xmm0 mov r15, qword ptr [rsp + 360] movdqu xmmword ptr [r15 + 0], xmm8 pop rax pinsrq xmm6, rax, 1 pop rax pinsrq xmm6, rax, 0 pop rax pinsrq xmm7, rax, 1 pop rax pinsrq xmm7, rax, 0 pop rax pinsrq xmm8, rax, 1 pop rax pinsrq xmm8, rax, 0 pop rax pinsrq xmm9, rax, 1 pop rax pinsrq xmm9, rax, 0 pop rax pinsrq xmm10, rax, 1 pop rax pinsrq xmm10, rax, 0 pop rax pinsrq xmm11, rax, 1 pop rax pinsrq xmm11, rax, 0 pop rax pinsrq xmm12, rax, 1 pop rax pinsrq xmm12, rax, 0 pop rax pinsrq xmm13, rax, 1 pop rax pinsrq xmm13, rax, 0 pop rax pinsrq xmm14, rax, 1 pop rax pinsrq xmm14, rax, 0 pop rax pinsrq xmm15, rax, 1 pop rax pinsrq xmm15, rax, 0 pop rbx pop rbp pop rdi pop rsi pop r12 pop r13 pop r14 pop r15 ret gcm128_encrypt_opt endp ALIGN 16 gcm256_encrypt_opt proc push r15 push r14 push r13 push r12 push rsi push rdi push rbp push rbx pextrq rax, xmm15, 0 push rax pextrq rax, xmm15, 1 push rax pextrq rax, xmm14, 0 push rax pextrq rax, xmm14, 1 push rax pextrq rax, xmm13, 0 push rax pextrq rax, xmm13, 1 push rax pextrq rax, xmm12, 0 push rax pextrq rax, xmm12, 1 push rax pextrq rax, xmm11, 0 push rax pextrq rax, xmm11, 1 push rax pextrq rax, xmm10, 0 push rax pextrq rax, xmm10, 1 push rax pextrq rax, xmm9, 0 push rax pextrq rax, xmm9, 1 push rax pextrq rax, xmm8, 0 push rax pextrq rax, xmm8, 1 push rax pextrq rax, xmm7, 0 push rax pextrq rax, xmm7, 1 push rax pextrq rax, xmm6, 0 push rax pextrq rax, xmm6, 1 push rax mov rdi, rcx mov rsi, rdx mov rdx, r8 mov rcx, r9 mov r8, qword ptr [rsp + 264] mov r9, qword ptr [rsp + 272] mov rbp, qword ptr [rsp + 352] mov r13, rcx lea r9, qword ptr [r9 + 32] mov rbx, qword ptr [rsp + 280] mov rcx, rdx imul rcx, 16 mov r10, 579005069656919567 pinsrq xmm9, r10, 0 mov r10, 283686952306183 pinsrq xmm9, r10, 1 pxor xmm8, xmm8 mov r11, rdi jmp L101 ALIGN 16 L100: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L101: cmp rdx, 6 jae L100 cmp rdx, 0 jbe L102 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L104 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L105 L104: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L106 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L108 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L110 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L111 L110: L111: jmp L109 L108: L109: jmp L107 L106: L107: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L105: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L103 L102: L103: mov r15, rsi cmp rsi, rcx jbe L112 movdqu xmm0, xmmword ptr [rbx + 0] mov r10, rsi and r10, 15 cmp r10, 8 jae L114 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L115 L114: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L115: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L113 L112: L113: mov rdi, qword ptr [rsp + 288] mov rsi, qword ptr [rsp + 296] mov rdx, qword ptr [rsp + 304] mov rcx, r13 movdqu xmm0, xmm9 movdqu xmm1, xmmword ptr [r8 + 0] movdqu xmmword ptr [rbp + 0], xmm1 pxor xmm10, xmm10 mov r11, 1 pinsrq xmm10, r11, 0 vpaddd xmm1, xmm1, xmm10 cmp rdx, 0 jne L116 vpshufb xmm1, xmm1, xmm0 movdqu xmmword ptr [rbp + 32], xmm1 jmp L117 L116: movdqu xmmword ptr [rbp + 32], xmm8 add rcx, 128 pextrq rbx, xmm1, 0 and rbx, 255 vpshufb xmm1, xmm1, xmm0 lea r14, qword ptr [rsi + 96] movdqu xmm4, xmmword ptr [rcx + -128] pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 movdqu xmm15, xmmword ptr [rcx + -112] mov r12, rcx sub r12, 96 vpxor xmm9, xmm1, xmm4 add rbx, 6 cmp rbx, 256 jae L118 vpaddd xmm10, xmm1, xmm2 vpaddd xmm11, xmm10, xmm2 vpxor xmm10, xmm10, xmm4 vpaddd xmm12, xmm11, xmm2 vpxor xmm11, xmm11, xmm4 vpaddd xmm13, xmm12, xmm2 vpxor xmm12, xmm12, xmm4 vpaddd xmm14, xmm13, xmm2 vpxor xmm13, xmm13, xmm4 vpaddd xmm1, xmm14, xmm2 vpxor xmm14, xmm14, xmm4 jmp L119 L118: sub rbx, 256 vpshufb xmm6, xmm1, xmm0 pxor xmm5, xmm5 mov r11, 1 pinsrq xmm5, r11, 0 vpaddd xmm10, xmm6, xmm5 pxor xmm5, xmm5 mov r11, 2 pinsrq xmm5, r11, 0 vpaddd xmm11, xmm6, xmm5 vpaddd xmm12, xmm10, xmm5 vpshufb xmm10, xmm10, xmm0 vpaddd xmm13, xmm11, xmm5 vpshufb xmm11, xmm11, xmm0 vpxor xmm10, xmm10, xmm4 vpaddd xmm14, xmm12, xmm5 vpshufb xmm12, xmm12, xmm0 vpxor xmm11, xmm11, xmm4 vpaddd xmm1, xmm13, xmm5 vpshufb xmm13, xmm13, xmm0 vpxor xmm12, xmm12, xmm4 vpshufb xmm14, xmm14, xmm0 vpxor xmm13, xmm13, xmm4 vpshufb xmm1, xmm1, xmm0 vpxor xmm14, xmm14, xmm4 L119: vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -96] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -80] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -64] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -48] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -32] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -16] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 0] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 16] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 32] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 48] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 64] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 80] movdqu xmm3, xmmword ptr [rcx + 96] vaesenc xmm9, xmm9, xmm15 vpxor xmm4, xmm3, xmmword ptr [rdi + 0] vaesenc xmm10, xmm10, xmm15 vpxor xmm5, xmm3, xmmword ptr [rdi + 16] vaesenc xmm11, xmm11, xmm15 vpxor xmm6, xmm3, xmmword ptr [rdi + 32] vaesenc xmm12, xmm12, xmm15 vpxor xmm8, xmm3, xmmword ptr [rdi + 48] vaesenc xmm13, xmm13, xmm15 vpxor xmm2, xmm3, xmmword ptr [rdi + 64] vaesenc xmm14, xmm14, xmm15 vpxor xmm3, xmm3, xmmword ptr [rdi + 80] lea rdi, qword ptr [rdi + 96] vaesenclast xmm9, xmm9, xmm4 vaesenclast xmm10, xmm10, xmm5 vaesenclast xmm11, xmm11, xmm6 vaesenclast xmm12, xmm12, xmm8 vaesenclast xmm13, xmm13, xmm2 vaesenclast xmm14, xmm14, xmm3 movdqu xmmword ptr [rsi + 0], xmm9 movdqu xmmword ptr [rsi + 16], xmm10 movdqu xmmword ptr [rsi + 32], xmm11 movdqu xmmword ptr [rsi + 48], xmm12 movdqu xmmword ptr [rsi + 64], xmm13 movdqu xmmword ptr [rsi + 80], xmm14 lea rsi, qword ptr [rsi + 96] vpshufb xmm8, xmm9, xmm0 vpshufb xmm2, xmm10, xmm0 movdqu xmmword ptr [rbp + 112], xmm8 vpshufb xmm4, xmm11, xmm0 movdqu xmmword ptr [rbp + 96], xmm2 vpshufb xmm5, xmm12, xmm0 movdqu xmmword ptr [rbp + 80], xmm4 vpshufb xmm6, xmm13, xmm0 movdqu xmmword ptr [rbp + 64], xmm5 vpshufb xmm7, xmm14, xmm0 movdqu xmmword ptr [rbp + 48], xmm6 movdqu xmm4, xmmword ptr [rcx + -128] pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 movdqu xmm15, xmmword ptr [rcx + -112] mov r12, rcx sub r12, 96 vpxor xmm9, xmm1, xmm4 add rbx, 6 cmp rbx, 256 jae L120 vpaddd xmm10, xmm1, xmm2 vpaddd xmm11, xmm10, xmm2 vpxor xmm10, xmm10, xmm4 vpaddd xmm12, xmm11, xmm2 vpxor xmm11, xmm11, xmm4 vpaddd xmm13, xmm12, xmm2 vpxor xmm12, xmm12, xmm4 vpaddd xmm14, xmm13, xmm2 vpxor xmm13, xmm13, xmm4 vpaddd xmm1, xmm14, xmm2 vpxor xmm14, xmm14, xmm4 jmp L121 L120: sub rbx, 256 vpshufb xmm6, xmm1, xmm0 pxor xmm5, xmm5 mov r11, 1 pinsrq xmm5, r11, 0 vpaddd xmm10, xmm6, xmm5 pxor xmm5, xmm5 mov r11, 2 pinsrq xmm5, r11, 0 vpaddd xmm11, xmm6, xmm5 vpaddd xmm12, xmm10, xmm5 vpshufb xmm10, xmm10, xmm0 vpaddd xmm13, xmm11, xmm5 vpshufb xmm11, xmm11, xmm0 vpxor xmm10, xmm10, xmm4 vpaddd xmm14, xmm12, xmm5 vpshufb xmm12, xmm12, xmm0 vpxor xmm11, xmm11, xmm4 vpaddd xmm1, xmm13, xmm5 vpshufb xmm13, xmm13, xmm0 vpxor xmm12, xmm12, xmm4 vpshufb xmm14, xmm14, xmm0 vpxor xmm13, xmm13, xmm4 vpshufb xmm1, xmm1, xmm0 vpxor xmm14, xmm14, xmm4 L121: vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -96] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -80] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -64] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -48] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -32] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -16] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 0] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 16] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 32] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 48] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 64] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + 80] movdqu xmm3, xmmword ptr [rcx + 96] vaesenc xmm9, xmm9, xmm15 vpxor xmm4, xmm3, xmmword ptr [rdi + 0] vaesenc xmm10, xmm10, xmm15 vpxor xmm5, xmm3, xmmword ptr [rdi + 16] vaesenc xmm11, xmm11, xmm15 vpxor xmm6, xmm3, xmmword ptr [rdi + 32] vaesenc xmm12, xmm12, xmm15 vpxor xmm8, xmm3, xmmword ptr [rdi + 48] vaesenc xmm13, xmm13, xmm15 vpxor xmm2, xmm3, xmmword ptr [rdi + 64] vaesenc xmm14, xmm14, xmm15 vpxor xmm3, xmm3, xmmword ptr [rdi + 80] lea rdi, qword ptr [rdi + 96] vaesenclast xmm9, xmm9, xmm4 vaesenclast xmm10, xmm10, xmm5 vaesenclast xmm11, xmm11, xmm6 vaesenclast xmm12, xmm12, xmm8 vaesenclast xmm13, xmm13, xmm2 vaesenclast xmm14, xmm14, xmm3 movdqu xmmword ptr [rsi + 0], xmm9 movdqu xmmword ptr [rsi + 16], xmm10 movdqu xmmword ptr [rsi + 32], xmm11 movdqu xmmword ptr [rsi + 48], xmm12 movdqu xmmword ptr [rsi + 64], xmm13 movdqu xmmword ptr [rsi + 80], xmm14 lea rsi, qword ptr [rsi + 96] sub rdx, 12 movdqu xmm8, xmmword ptr [rbp + 32] pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 vpxor xmm4, xmm4, xmm4 movdqu xmm15, xmmword ptr [rcx + -128] vpaddd xmm10, xmm1, xmm2 vpaddd xmm11, xmm10, xmm2 vpaddd xmm12, xmm11, xmm2 vpaddd xmm13, xmm12, xmm2 vpaddd xmm14, xmm13, xmm2 vpxor xmm9, xmm1, xmm15 movdqu xmmword ptr [rbp + 16], xmm4 jmp L123 ALIGN 16 L122: add rbx, 6 cmp rbx, 256 jb L124 mov r11, 579005069656919567 pinsrq xmm0, r11, 0 mov r11, 283686952306183 pinsrq xmm0, r11, 1 vpshufb xmm6, xmm1, xmm0 pxor xmm5, xmm5 mov r11, 1 pinsrq xmm5, r11, 0 vpaddd xmm10, xmm6, xmm5 pxor xmm5, xmm5 mov r11, 2 pinsrq xmm5, r11, 0 vpaddd xmm11, xmm6, xmm5 movdqu xmm3, xmmword ptr [r9 + -32] vpaddd xmm12, xmm10, xmm5 vpshufb xmm10, xmm10, xmm0 vpaddd xmm13, xmm11, xmm5 vpshufb xmm11, xmm11, xmm0 vpxor xmm10, xmm10, xmm15 vpaddd xmm14, xmm12, xmm5 vpshufb xmm12, xmm12, xmm0 vpxor xmm11, xmm11, xmm15 vpaddd xmm1, xmm13, xmm5 vpshufb xmm13, xmm13, xmm0 vpshufb xmm14, xmm14, xmm0 vpshufb xmm1, xmm1, xmm0 sub rbx, 256 jmp L125 L124: movdqu xmm3, xmmword ptr [r9 + -32] vpaddd xmm1, xmm2, xmm14 vpxor xmm10, xmm10, xmm15 vpxor xmm11, xmm11, xmm15 L125: movdqu xmmword ptr [rbp + 128], xmm1 vpclmulqdq xmm5, xmm7, xmm3, 16 vpxor xmm12, xmm12, xmm15 movdqu xmm2, xmmword ptr [rcx + -112] vpclmulqdq xmm6, xmm7, xmm3, 1 vaesenc xmm9, xmm9, xmm2 movdqu xmm0, xmmword ptr [rbp + 48] vpxor xmm13, xmm13, xmm15 vpclmulqdq xmm1, xmm7, xmm3, 0 vaesenc xmm10, xmm10, xmm2 vpxor xmm14, xmm14, xmm15 vpclmulqdq xmm7, xmm7, xmm3, 17 vaesenc xmm11, xmm11, xmm2 movdqu xmm3, xmmword ptr [r9 + -16] vaesenc xmm12, xmm12, xmm2 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm3, 0 vpxor xmm8, xmm8, xmm4 vaesenc xmm13, xmm13, xmm2 vpxor xmm4, xmm1, xmm5 vpclmulqdq xmm1, xmm0, xmm3, 16 vaesenc xmm14, xmm14, xmm2 movdqu xmm15, xmmword ptr [rcx + -96] vpclmulqdq xmm2, xmm0, xmm3, 1 vaesenc xmm9, xmm9, xmm15 vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpclmulqdq xmm3, xmm0, xmm3, 17 movdqu xmm0, xmmword ptr [rbp + 64] vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 88] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 80] vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 32], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 40], r12 movdqu xmm5, xmmword ptr [r9 + 16] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -80] vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm0, xmm5, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm5, 16 vaesenc xmm10, xmm10, xmm15 vpxor xmm7, xmm7, xmm3 vpclmulqdq xmm3, xmm0, xmm5, 1 vaesenc xmm11, xmm11, xmm15 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [rbp + 80] vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -64] vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm1, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm1, 16 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 72] vpxor xmm7, xmm7, xmm5 vpclmulqdq xmm5, xmm0, xmm1, 1 vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 64] vpclmulqdq xmm1, xmm0, xmm1, 17 movdqu xmm0, xmmword ptr [rbp + 96] vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 48], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 56], r12 vpxor xmm4, xmm4, xmm2 movdqu xmm2, xmmword ptr [r9 + 64] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -48] vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm2, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm2, 16 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 56] vpxor xmm7, xmm7, xmm1 vpclmulqdq xmm1, xmm0, xmm2, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 112] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 48] vpclmulqdq xmm2, xmm0, xmm2, 17 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 64], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 72], r12 vpxor xmm4, xmm4, xmm3 movdqu xmm3, xmmword ptr [r9 + 80] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -32] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm8, xmm3, 16 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm8, xmm3, 1 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 40] vpxor xmm7, xmm7, xmm2 vpclmulqdq xmm2, xmm8, xmm3, 0 vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 32] vpclmulqdq xmm8, xmm8, xmm3, 17 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 80], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 88], r12 vpxor xmm6, xmm6, xmm5 vaesenc xmm14, xmm14, xmm15 vpxor xmm6, xmm6, xmm1 movdqu xmm15, xmmword ptr [rcx + -16] vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm2 pxor xmm3, xmm3 mov r11, 13979173243358019584 pinsrq xmm3, r11, 1 vaesenc xmm9, xmm9, xmm15 vpxor xmm7, xmm7, xmm8 vaesenc xmm10, xmm10, xmm15 vpxor xmm4, xmm4, xmm5 movbe r13, qword ptr [r14 + 24] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 16] vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 mov qword ptr [rbp + 96], r13 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 104], r12 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm1, xmmword ptr [rcx + 0] vaesenc xmm9, xmm9, xmm1 movdqu xmm15, xmmword ptr [rcx + 16] vaesenc xmm10, xmm10, xmm1 vpsrldq xmm6, xmm6, 8 vaesenc xmm11, xmm11, xmm1 vpxor xmm7, xmm7, xmm6 vaesenc xmm12, xmm12, xmm1 vpxor xmm4, xmm4, xmm0 movbe r13, qword ptr [r14 + 8] vaesenc xmm13, xmm13, xmm1 movbe r12, qword ptr [r14 + 0] vaesenc xmm14, xmm14, xmm1 movdqu xmm1, xmmword ptr [rcx + 32] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 vaesenc xmm9, xmm9, xmm1 vaesenc xmm10, xmm10, xmm1 vaesenc xmm11, xmm11, xmm1 vaesenc xmm12, xmm12, xmm1 vaesenc xmm13, xmm13, xmm1 movdqu xmm15, xmmword ptr [rcx + 48] vaesenc xmm14, xmm14, xmm1 movdqu xmm1, xmmword ptr [rcx + 64] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 vaesenc xmm9, xmm9, xmm1 vaesenc xmm10, xmm10, xmm1 vaesenc xmm11, xmm11, xmm1 vaesenc xmm12, xmm12, xmm1 vaesenc xmm13, xmm13, xmm1 movdqu xmm15, xmmword ptr [rcx + 80] vaesenc xmm14, xmm14, xmm1 movdqu xmm1, xmmword ptr [rcx + 96] vaesenc xmm9, xmm9, xmm15 movdqu xmmword ptr [rbp + 16], xmm7 vpalignr xmm8, xmm4, xmm4, 8 vaesenc xmm10, xmm10, xmm15 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm2, xmm1, xmmword ptr [rdi + 0] vaesenc xmm11, xmm11, xmm15 vpxor xmm0, xmm1, xmmword ptr [rdi + 16] vaesenc xmm12, xmm12, xmm15 vpxor xmm5, xmm1, xmmword ptr [rdi + 32] vaesenc xmm13, xmm13, xmm15 vpxor xmm6, xmm1, xmmword ptr [rdi + 48] vaesenc xmm14, xmm14, xmm15 vpxor xmm7, xmm1, xmmword ptr [rdi + 64] vpxor xmm3, xmm1, xmmword ptr [rdi + 80] movdqu xmm1, xmmword ptr [rbp + 128] vaesenclast xmm9, xmm9, xmm2 pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 vaesenclast xmm10, xmm10, xmm0 vpaddd xmm0, xmm1, xmm2 mov qword ptr [rbp + 112], r13 lea rdi, qword ptr [rdi + 96] vaesenclast xmm11, xmm11, xmm5 vpaddd xmm5, xmm0, xmm2 mov qword ptr [rbp + 120], r12 lea rsi, qword ptr [rsi + 96] movdqu xmm15, xmmword ptr [rcx + -128] vaesenclast xmm12, xmm12, xmm6 vpaddd xmm6, xmm5, xmm2 vaesenclast xmm13, xmm13, xmm7 vpaddd xmm7, xmm6, xmm2 vaesenclast xmm14, xmm14, xmm3 vpaddd xmm3, xmm7, xmm2 sub rdx, 6 add r14, 96 cmp rdx, 0 jbe L126 movdqu xmmword ptr [rsi + -96], xmm9 vpxor xmm9, xmm1, xmm15 movdqu xmmword ptr [rsi + -80], xmm10 movdqu xmm10, xmm0 movdqu xmmword ptr [rsi + -64], xmm11 movdqu xmm11, xmm5 movdqu xmmword ptr [rsi + -48], xmm12 movdqu xmm12, xmm6 movdqu xmmword ptr [rsi + -32], xmm13 movdqu xmm13, xmm7 movdqu xmmword ptr [rsi + -16], xmm14 movdqu xmm14, xmm3 movdqu xmm7, xmmword ptr [rbp + 32] jmp L127 L126: vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpxor xmm8, xmm8, xmm4 L127: ALIGN 16 L123: cmp rdx, 0 ja L122 movdqu xmm7, xmmword ptr [rbp + 32] movdqu xmmword ptr [rbp + 32], xmm1 pxor xmm4, xmm4 movdqu xmmword ptr [rbp + 16], xmm4 movdqu xmm3, xmmword ptr [r9 + -32] vpclmulqdq xmm1, xmm7, xmm3, 0 vpclmulqdq xmm5, xmm7, xmm3, 16 movdqu xmm0, xmmword ptr [rbp + 48] vpclmulqdq xmm6, xmm7, xmm3, 1 vpclmulqdq xmm7, xmm7, xmm3, 17 movdqu xmm3, xmmword ptr [r9 + -16] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm3, 0 vpxor xmm8, xmm8, xmm4 vpxor xmm4, xmm1, xmm5 vpclmulqdq xmm1, xmm0, xmm3, 16 vpclmulqdq xmm2, xmm0, xmm3, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpclmulqdq xmm3, xmm0, xmm3, 17 movdqu xmm0, xmmword ptr [rbp + 64] movdqu xmm5, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm0, xmm5, 0 vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm5, 16 vpxor xmm7, xmm7, xmm3 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [rbp + 80] vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm1, 0 vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm1, 16 vpxor xmm7, xmm7, xmm5 vpclmulqdq xmm5, xmm0, xmm1, 1 vpclmulqdq xmm1, xmm0, xmm1, 17 movdqu xmm0, xmmword ptr [rbp + 96] vpxor xmm4, xmm4, xmm2 movdqu xmm2, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm2, 0 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm2, 16 vpxor xmm7, xmm7, xmm1 vpclmulqdq xmm1, xmm0, xmm2, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 112] vpclmulqdq xmm2, xmm0, xmm2, 17 vpxor xmm4, xmm4, xmm3 movdqu xmm3, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm8, xmm3, 16 vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm8, xmm3, 1 vpxor xmm7, xmm7, xmm2 vpclmulqdq xmm2, xmm8, xmm3, 0 vpclmulqdq xmm8, xmm8, xmm3, 17 vpxor xmm6, xmm6, xmm5 vpxor xmm6, xmm6, xmm1 vpxor xmm4, xmm4, xmm2 pxor xmm3, xmm3 mov rax, 3254779904 pinsrd xmm3, eax, 3 vpxor xmm7, xmm7, xmm8 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 mov r12, 579005069656919567 pinsrq xmm0, r12, 0 mov r12, 283686952306183 pinsrq xmm0, r12, 1 movdqu xmmword ptr [rsi + -96], xmm9 vpshufb xmm9, xmm9, xmm0 vpxor xmm1, xmm1, xmm7 movdqu xmmword ptr [rsi + -80], xmm10 vpshufb xmm10, xmm10, xmm0 movdqu xmmword ptr [rsi + -64], xmm11 vpshufb xmm11, xmm11, xmm0 movdqu xmmword ptr [rsi + -48], xmm12 vpshufb xmm12, xmm12, xmm0 movdqu xmmword ptr [rsi + -32], xmm13 vpshufb xmm13, xmm13, xmm0 movdqu xmmword ptr [rsi + -16], xmm14 vpshufb xmm14, xmm14, xmm0 pxor xmm4, xmm4 movdqu xmm7, xmm14 movdqu xmmword ptr [rbp + 16], xmm4 movdqu xmmword ptr [rbp + 48], xmm13 movdqu xmmword ptr [rbp + 64], xmm12 movdqu xmmword ptr [rbp + 80], xmm11 movdqu xmmword ptr [rbp + 96], xmm10 movdqu xmmword ptr [rbp + 112], xmm9 movdqu xmm3, xmmword ptr [r9 + -32] vpclmulqdq xmm1, xmm7, xmm3, 0 vpclmulqdq xmm5, xmm7, xmm3, 16 movdqu xmm0, xmmword ptr [rbp + 48] vpclmulqdq xmm6, xmm7, xmm3, 1 vpclmulqdq xmm7, xmm7, xmm3, 17 movdqu xmm3, xmmword ptr [r9 + -16] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm3, 0 vpxor xmm8, xmm8, xmm4 vpxor xmm4, xmm1, xmm5 vpclmulqdq xmm1, xmm0, xmm3, 16 vpclmulqdq xmm2, xmm0, xmm3, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpclmulqdq xmm3, xmm0, xmm3, 17 movdqu xmm0, xmmword ptr [rbp + 64] movdqu xmm5, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm0, xmm5, 0 vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm5, 16 vpxor xmm7, xmm7, xmm3 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [rbp + 80] vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm1, 0 vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm1, 16 vpxor xmm7, xmm7, xmm5 vpclmulqdq xmm5, xmm0, xmm1, 1 vpclmulqdq xmm1, xmm0, xmm1, 17 movdqu xmm0, xmmword ptr [rbp + 96] vpxor xmm4, xmm4, xmm2 movdqu xmm2, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm2, 0 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm2, 16 vpxor xmm7, xmm7, xmm1 vpclmulqdq xmm1, xmm0, xmm2, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 112] vpclmulqdq xmm2, xmm0, xmm2, 17 vpxor xmm4, xmm4, xmm3 movdqu xmm3, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm8, xmm3, 16 vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm8, xmm3, 1 vpxor xmm7, xmm7, xmm2 vpclmulqdq xmm2, xmm8, xmm3, 0 vpclmulqdq xmm8, xmm8, xmm3, 17 vpxor xmm6, xmm6, xmm5 vpxor xmm6, xmm6, xmm1 vpxor xmm4, xmm4, xmm2 pxor xmm3, xmm3 mov rax, 3254779904 pinsrd xmm3, eax, 3 vpxor xmm7, xmm7, xmm8 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 sub rcx, 128 L117: movdqu xmm11, xmmword ptr [rbp + 32] mov r8, rcx mov rax, qword ptr [rsp + 312] mov rdi, qword ptr [rsp + 320] mov rdx, qword ptr [rsp + 328] mov r14, rdx mov r12, 579005069656919567 pinsrq xmm9, r12, 0 mov r12, 283686952306183 pinsrq xmm9, r12, 1 pshufb xmm11, xmm9 pxor xmm10, xmm10 mov rbx, 1 pinsrd xmm10, ebx, 0 mov r11, rax mov r10, rdi mov rbx, 0 jmp L129 ALIGN 16 L128: movdqu xmm0, xmm11 pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 movdqu xmm2, xmmword ptr [r11 + 0] pxor xmm2, xmm0 movdqu xmmword ptr [r10 + 0], xmm2 add rbx, 1 add r11, 16 add r10, 16 paddd xmm11, xmm10 ALIGN 16 L129: cmp rbx, rdx jne L128 mov r11, rdi jmp L131 ALIGN 16 L130: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L131: cmp rdx, 6 jae L130 cmp rdx, 0 jbe L132 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L134 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L135 L134: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L136 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L138 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L140 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L141 L140: L141: jmp L139 L138: L139: jmp L137 L136: L137: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L135: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L133 L132: L133: add r14, qword ptr [rsp + 304] imul r14, 16 mov r13, qword ptr [rsp + 344] cmp r13, r14 jbe L142 mov rax, qword ptr [rsp + 336] mov r10, r13 and r10, 15 movdqu xmm0, xmm11 pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 movdqu xmm4, xmmword ptr [rax + 0] pxor xmm0, xmm4 movdqu xmmword ptr [rax + 0], xmm0 cmp r10, 8 jae L144 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L145 L144: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L145: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L143 L142: L143: mov r11, r15 pxor xmm0, xmm0 mov rax, r11 imul rax, 8 pinsrq xmm0, rax, 1 mov rax, r13 imul rax, 8 pinsrq xmm0, rax, 0 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 movdqu xmm0, xmmword ptr [rbp + 0] pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pshufb xmm8, xmm9 pxor xmm8, xmm0 mov r15, qword ptr [rsp + 360] movdqu xmmword ptr [r15 + 0], xmm8 pop rax pinsrq xmm6, rax, 1 pop rax pinsrq xmm6, rax, 0 pop rax pinsrq xmm7, rax, 1 pop rax pinsrq xmm7, rax, 0 pop rax pinsrq xmm8, rax, 1 pop rax pinsrq xmm8, rax, 0 pop rax pinsrq xmm9, rax, 1 pop rax pinsrq xmm9, rax, 0 pop rax pinsrq xmm10, rax, 1 pop rax pinsrq xmm10, rax, 0 pop rax pinsrq xmm11, rax, 1 pop rax pinsrq xmm11, rax, 0 pop rax pinsrq xmm12, rax, 1 pop rax pinsrq xmm12, rax, 0 pop rax pinsrq xmm13, rax, 1 pop rax pinsrq xmm13, rax, 0 pop rax pinsrq xmm14, rax, 1 pop rax pinsrq xmm14, rax, 0 pop rax pinsrq xmm15, rax, 1 pop rax pinsrq xmm15, rax, 0 pop rbx pop rbp pop rdi pop rsi pop r12 pop r13 pop r14 pop r15 ret gcm256_encrypt_opt endp ALIGN 16 gcm128_decrypt_opt proc push r15 push r14 push r13 push r12 push rsi push rdi push rbp push rbx pextrq rax, xmm15, 0 push rax pextrq rax, xmm15, 1 push rax pextrq rax, xmm14, 0 push rax pextrq rax, xmm14, 1 push rax pextrq rax, xmm13, 0 push rax pextrq rax, xmm13, 1 push rax pextrq rax, xmm12, 0 push rax pextrq rax, xmm12, 1 push rax pextrq rax, xmm11, 0 push rax pextrq rax, xmm11, 1 push rax pextrq rax, xmm10, 0 push rax pextrq rax, xmm10, 1 push rax pextrq rax, xmm9, 0 push rax pextrq rax, xmm9, 1 push rax pextrq rax, xmm8, 0 push rax pextrq rax, xmm8, 1 push rax pextrq rax, xmm7, 0 push rax pextrq rax, xmm7, 1 push rax pextrq rax, xmm6, 0 push rax pextrq rax, xmm6, 1 push rax mov rdi, rcx mov rsi, rdx mov rdx, r8 mov rcx, r9 mov r8, qword ptr [rsp + 264] mov r9, qword ptr [rsp + 272] mov rbp, qword ptr [rsp + 352] mov r13, rcx lea r9, qword ptr [r9 + 32] mov rbx, qword ptr [rsp + 280] mov rcx, rdx imul rcx, 16 mov r10, 579005069656919567 pinsrq xmm9, r10, 0 mov r10, 283686952306183 pinsrq xmm9, r10, 1 pxor xmm8, xmm8 mov r11, rdi jmp L147 ALIGN 16 L146: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L147: cmp rdx, 6 jae L146 cmp rdx, 0 jbe L148 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L150 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L151 L150: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L152 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L154 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L156 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L157 L156: L157: jmp L155 L154: L155: jmp L153 L152: L153: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L151: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L149 L148: L149: mov r15, rsi cmp rsi, rcx jbe L158 movdqu xmm0, xmmword ptr [rbx + 0] mov r10, rsi and r10, 15 cmp r10, 8 jae L160 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L161 L160: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L161: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L159 L158: L159: mov rdi, qword ptr [rsp + 288] mov rsi, qword ptr [rsp + 296] mov rdx, qword ptr [rsp + 304] mov rcx, r13 movdqu xmm0, xmm9 movdqu xmm1, xmmword ptr [r8 + 0] movdqu xmmword ptr [rbp + 0], xmm1 pxor xmm10, xmm10 mov r11, 1 pinsrq xmm10, r11, 0 vpaddd xmm1, xmm1, xmm10 cmp rdx, 0 jne L162 vpshufb xmm1, xmm1, xmm0 movdqu xmmword ptr [rbp + 32], xmm1 jmp L163 L162: movdqu xmmword ptr [rbp + 32], xmm8 add rcx, 128 pextrq rbx, xmm1, 0 and rbx, 255 vpshufb xmm1, xmm1, xmm0 lea r14, qword ptr [rdi + 96] movdqu xmm8, xmmword ptr [rbp + 32] movdqu xmm7, xmmword ptr [rdi + 80] movdqu xmm4, xmmword ptr [rdi + 64] movdqu xmm5, xmmword ptr [rdi + 48] movdqu xmm6, xmmword ptr [rdi + 32] vpshufb xmm7, xmm7, xmm0 movdqu xmm2, xmmword ptr [rdi + 16] vpshufb xmm4, xmm4, xmm0 movdqu xmm3, xmmword ptr [rdi + 0] vpshufb xmm5, xmm5, xmm0 movdqu xmmword ptr [rbp + 48], xmm4 vpshufb xmm6, xmm6, xmm0 movdqu xmmword ptr [rbp + 64], xmm5 vpshufb xmm2, xmm2, xmm0 movdqu xmmword ptr [rbp + 80], xmm6 vpshufb xmm3, xmm3, xmm0 movdqu xmmword ptr [rbp + 96], xmm2 movdqu xmmword ptr [rbp + 112], xmm3 pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 vpxor xmm4, xmm4, xmm4 movdqu xmm15, xmmword ptr [rcx + -128] vpaddd xmm10, xmm1, xmm2 vpaddd xmm11, xmm10, xmm2 vpaddd xmm12, xmm11, xmm2 vpaddd xmm13, xmm12, xmm2 vpaddd xmm14, xmm13, xmm2 vpxor xmm9, xmm1, xmm15 movdqu xmmword ptr [rbp + 16], xmm4 cmp rdx, 6 jne L164 sub r14, 96 jmp L165 L164: L165: jmp L167 ALIGN 16 L166: add rbx, 6 cmp rbx, 256 jb L168 mov r11, 579005069656919567 pinsrq xmm0, r11, 0 mov r11, 283686952306183 pinsrq xmm0, r11, 1 vpshufb xmm6, xmm1, xmm0 pxor xmm5, xmm5 mov r11, 1 pinsrq xmm5, r11, 0 vpaddd xmm10, xmm6, xmm5 pxor xmm5, xmm5 mov r11, 2 pinsrq xmm5, r11, 0 vpaddd xmm11, xmm6, xmm5 movdqu xmm3, xmmword ptr [r9 + -32] vpaddd xmm12, xmm10, xmm5 vpshufb xmm10, xmm10, xmm0 vpaddd xmm13, xmm11, xmm5 vpshufb xmm11, xmm11, xmm0 vpxor xmm10, xmm10, xmm15 vpaddd xmm14, xmm12, xmm5 vpshufb xmm12, xmm12, xmm0 vpxor xmm11, xmm11, xmm15 vpaddd xmm1, xmm13, xmm5 vpshufb xmm13, xmm13, xmm0 vpshufb xmm14, xmm14, xmm0 vpshufb xmm1, xmm1, xmm0 sub rbx, 256 jmp L169 L168: movdqu xmm3, xmmword ptr [r9 + -32] vpaddd xmm1, xmm2, xmm14 vpxor xmm10, xmm10, xmm15 vpxor xmm11, xmm11, xmm15 L169: movdqu xmmword ptr [rbp + 128], xmm1 vpclmulqdq xmm5, xmm7, xmm3, 16 vpxor xmm12, xmm12, xmm15 movdqu xmm2, xmmword ptr [rcx + -112] vpclmulqdq xmm6, xmm7, xmm3, 1 vaesenc xmm9, xmm9, xmm2 movdqu xmm0, xmmword ptr [rbp + 48] vpxor xmm13, xmm13, xmm15 vpclmulqdq xmm1, xmm7, xmm3, 0 vaesenc xmm10, xmm10, xmm2 vpxor xmm14, xmm14, xmm15 vpclmulqdq xmm7, xmm7, xmm3, 17 vaesenc xmm11, xmm11, xmm2 movdqu xmm3, xmmword ptr [r9 + -16] vaesenc xmm12, xmm12, xmm2 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm3, 0 vpxor xmm8, xmm8, xmm4 vaesenc xmm13, xmm13, xmm2 vpxor xmm4, xmm1, xmm5 vpclmulqdq xmm1, xmm0, xmm3, 16 vaesenc xmm14, xmm14, xmm2 movdqu xmm15, xmmword ptr [rcx + -96] vpclmulqdq xmm2, xmm0, xmm3, 1 vaesenc xmm9, xmm9, xmm15 vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpclmulqdq xmm3, xmm0, xmm3, 17 movdqu xmm0, xmmword ptr [rbp + 64] vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 88] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 80] vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 32], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 40], r12 movdqu xmm5, xmmword ptr [r9 + 16] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -80] vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm0, xmm5, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm5, 16 vaesenc xmm10, xmm10, xmm15 vpxor xmm7, xmm7, xmm3 vpclmulqdq xmm3, xmm0, xmm5, 1 vaesenc xmm11, xmm11, xmm15 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [rbp + 80] vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -64] vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm1, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm1, 16 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 72] vpxor xmm7, xmm7, xmm5 vpclmulqdq xmm5, xmm0, xmm1, 1 vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 64] vpclmulqdq xmm1, xmm0, xmm1, 17 movdqu xmm0, xmmword ptr [rbp + 96] vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 48], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 56], r12 vpxor xmm4, xmm4, xmm2 movdqu xmm2, xmmword ptr [r9 + 64] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -48] vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm2, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm2, 16 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 56] vpxor xmm7, xmm7, xmm1 vpclmulqdq xmm1, xmm0, xmm2, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 112] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 48] vpclmulqdq xmm2, xmm0, xmm2, 17 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 64], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 72], r12 vpxor xmm4, xmm4, xmm3 movdqu xmm3, xmmword ptr [r9 + 80] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -32] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm8, xmm3, 16 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm8, xmm3, 1 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 40] vpxor xmm7, xmm7, xmm2 vpclmulqdq xmm2, xmm8, xmm3, 0 vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 32] vpclmulqdq xmm8, xmm8, xmm3, 17 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 80], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 88], r12 vpxor xmm6, xmm6, xmm5 vaesenc xmm14, xmm14, xmm15 vpxor xmm6, xmm6, xmm1 movdqu xmm15, xmmword ptr [rcx + -16] vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm2 pxor xmm3, xmm3 mov r11, 13979173243358019584 pinsrq xmm3, r11, 1 vaesenc xmm9, xmm9, xmm15 vpxor xmm7, xmm7, xmm8 vaesenc xmm10, xmm10, xmm15 vpxor xmm4, xmm4, xmm5 movbe r13, qword ptr [r14 + 24] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 16] vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 mov qword ptr [rbp + 96], r13 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 104], r12 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm1, xmmword ptr [rcx + 0] vaesenc xmm9, xmm9, xmm1 movdqu xmm15, xmmword ptr [rcx + 16] vaesenc xmm10, xmm10, xmm1 vpsrldq xmm6, xmm6, 8 vaesenc xmm11, xmm11, xmm1 vpxor xmm7, xmm7, xmm6 vaesenc xmm12, xmm12, xmm1 vpxor xmm4, xmm4, xmm0 movbe r13, qword ptr [r14 + 8] vaesenc xmm13, xmm13, xmm1 movbe r12, qword ptr [r14 + 0] vaesenc xmm14, xmm14, xmm1 movdqu xmm1, xmmword ptr [rcx + 32] vaesenc xmm9, xmm9, xmm15 movdqu xmmword ptr [rbp + 16], xmm7 vpalignr xmm8, xmm4, xmm4, 8 vaesenc xmm10, xmm10, xmm15 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm2, xmm1, xmmword ptr [rdi + 0] vaesenc xmm11, xmm11, xmm15 vpxor xmm0, xmm1, xmmword ptr [rdi + 16] vaesenc xmm12, xmm12, xmm15 vpxor xmm5, xmm1, xmmword ptr [rdi + 32] vaesenc xmm13, xmm13, xmm15 vpxor xmm6, xmm1, xmmword ptr [rdi + 48] vaesenc xmm14, xmm14, xmm15 vpxor xmm7, xmm1, xmmword ptr [rdi + 64] vpxor xmm3, xmm1, xmmword ptr [rdi + 80] movdqu xmm1, xmmword ptr [rbp + 128] vaesenclast xmm9, xmm9, xmm2 pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 vaesenclast xmm10, xmm10, xmm0 vpaddd xmm0, xmm1, xmm2 mov qword ptr [rbp + 112], r13 lea rdi, qword ptr [rdi + 96] vaesenclast xmm11, xmm11, xmm5 vpaddd xmm5, xmm0, xmm2 mov qword ptr [rbp + 120], r12 lea rsi, qword ptr [rsi + 96] movdqu xmm15, xmmword ptr [rcx + -128] vaesenclast xmm12, xmm12, xmm6 vpaddd xmm6, xmm5, xmm2 vaesenclast xmm13, xmm13, xmm7 vpaddd xmm7, xmm6, xmm2 vaesenclast xmm14, xmm14, xmm3 vpaddd xmm3, xmm7, xmm2 sub rdx, 6 cmp rdx, 6 jbe L170 add r14, 96 jmp L171 L170: L171: cmp rdx, 0 jbe L172 movdqu xmmword ptr [rsi + -96], xmm9 vpxor xmm9, xmm1, xmm15 movdqu xmmword ptr [rsi + -80], xmm10 movdqu xmm10, xmm0 movdqu xmmword ptr [rsi + -64], xmm11 movdqu xmm11, xmm5 movdqu xmmword ptr [rsi + -48], xmm12 movdqu xmm12, xmm6 movdqu xmmword ptr [rsi + -32], xmm13 movdqu xmm13, xmm7 movdqu xmmword ptr [rsi + -16], xmm14 movdqu xmm14, xmm3 movdqu xmm7, xmmword ptr [rbp + 32] jmp L173 L172: vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpxor xmm8, xmm8, xmm4 L173: ALIGN 16 L167: cmp rdx, 0 ja L166 movdqu xmmword ptr [rbp + 32], xmm1 movdqu xmmword ptr [rsi + -96], xmm9 movdqu xmmword ptr [rsi + -80], xmm10 movdqu xmmword ptr [rsi + -64], xmm11 movdqu xmmword ptr [rsi + -48], xmm12 movdqu xmmword ptr [rsi + -32], xmm13 movdqu xmmword ptr [rsi + -16], xmm14 sub rcx, 128 L163: movdqu xmm11, xmmword ptr [rbp + 32] mov r8, rcx mov rax, qword ptr [rsp + 312] mov rdi, qword ptr [rsp + 320] mov rdx, qword ptr [rsp + 328] mov r14, rdx mov r12, 579005069656919567 pinsrq xmm9, r12, 0 mov r12, 283686952306183 pinsrq xmm9, r12, 1 pshufb xmm11, xmm9 mov rbx, rdi mov r12, rdx mov rdi, rax mov r11, rdi jmp L175 ALIGN 16 L174: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L175: cmp rdx, 6 jae L174 cmp rdx, 0 jbe L176 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L178 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L179 L178: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L180 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L182 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L184 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L185 L184: L185: jmp L183 L182: L183: jmp L181 L180: L181: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L179: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L177 L176: L177: mov rdi, rbx mov rdx, r12 pxor xmm10, xmm10 mov rbx, 1 pinsrd xmm10, ebx, 0 mov r11, rax mov r10, rdi mov rbx, 0 jmp L187 ALIGN 16 L186: movdqu xmm0, xmm11 pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 movdqu xmm2, xmmword ptr [r11 + 0] pxor xmm2, xmm0 movdqu xmmword ptr [r10 + 0], xmm2 add rbx, 1 add r11, 16 add r10, 16 paddd xmm11, xmm10 ALIGN 16 L187: cmp rbx, rdx jne L186 add r14, qword ptr [rsp + 304] imul r14, 16 mov r13, qword ptr [rsp + 344] cmp r13, r14 jbe L188 mov rax, qword ptr [rsp + 336] mov r10, r13 and r10, 15 movdqu xmm0, xmmword ptr [rax + 0] movdqu xmm10, xmm0 cmp r10, 8 jae L190 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L191 L190: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L191: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 movdqu xmm0, xmm11 pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pxor xmm10, xmm0 movdqu xmmword ptr [rax + 0], xmm10 jmp L189 L188: L189: mov r11, r15 pxor xmm0, xmm0 mov rax, r11 imul rax, 8 pinsrq xmm0, rax, 1 mov rax, r13 imul rax, 8 pinsrq xmm0, rax, 0 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 movdqu xmm0, xmmword ptr [rbp + 0] pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pshufb xmm8, xmm9 pxor xmm8, xmm0 mov r15, qword ptr [rsp + 360] movdqu xmm0, xmmword ptr [r15 + 0] pcmpeqd xmm0, xmm8 pextrq rdx, xmm0, 0 sub rdx, 18446744073709551615 mov rax, 0 adc rax, 0 pextrq rdx, xmm0, 1 sub rdx, 18446744073709551615 mov rdx, 0 adc rdx, 0 add rax, rdx mov rcx, rax pop rax pinsrq xmm6, rax, 1 pop rax pinsrq xmm6, rax, 0 pop rax pinsrq xmm7, rax, 1 pop rax pinsrq xmm7, rax, 0 pop rax pinsrq xmm8, rax, 1 pop rax pinsrq xmm8, rax, 0 pop rax pinsrq xmm9, rax, 1 pop rax pinsrq xmm9, rax, 0 pop rax pinsrq xmm10, rax, 1 pop rax pinsrq xmm10, rax, 0 pop rax pinsrq xmm11, rax, 1 pop rax pinsrq xmm11, rax, 0 pop rax pinsrq xmm12, rax, 1 pop rax pinsrq xmm12, rax, 0 pop rax pinsrq xmm13, rax, 1 pop rax pinsrq xmm13, rax, 0 pop rax pinsrq xmm14, rax, 1 pop rax pinsrq xmm14, rax, 0 pop rax pinsrq xmm15, rax, 1 pop rax pinsrq xmm15, rax, 0 pop rbx pop rbp pop rdi pop rsi pop r12 pop r13 pop r14 pop r15 mov rax, rcx ret gcm128_decrypt_opt endp ALIGN 16 gcm256_decrypt_opt proc push r15 push r14 push r13 push r12 push rsi push rdi push rbp push rbx pextrq rax, xmm15, 0 push rax pextrq rax, xmm15, 1 push rax pextrq rax, xmm14, 0 push rax pextrq rax, xmm14, 1 push rax pextrq rax, xmm13, 0 push rax pextrq rax, xmm13, 1 push rax pextrq rax, xmm12, 0 push rax pextrq rax, xmm12, 1 push rax pextrq rax, xmm11, 0 push rax pextrq rax, xmm11, 1 push rax pextrq rax, xmm10, 0 push rax pextrq rax, xmm10, 1 push rax pextrq rax, xmm9, 0 push rax pextrq rax, xmm9, 1 push rax pextrq rax, xmm8, 0 push rax pextrq rax, xmm8, 1 push rax pextrq rax, xmm7, 0 push rax pextrq rax, xmm7, 1 push rax pextrq rax, xmm6, 0 push rax pextrq rax, xmm6, 1 push rax mov rdi, rcx mov rsi, rdx mov rdx, r8 mov rcx, r9 mov r8, qword ptr [rsp + 264] mov r9, qword ptr [rsp + 272] mov rbp, qword ptr [rsp + 352] mov r13, rcx lea r9, qword ptr [r9 + 32] mov rbx, qword ptr [rsp + 280] mov rcx, rdx imul rcx, 16 mov r10, 579005069656919567 pinsrq xmm9, r10, 0 mov r10, 283686952306183 pinsrq xmm9, r10, 1 pxor xmm8, xmm8 mov r11, rdi jmp L193 ALIGN 16 L192: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L193: cmp rdx, 6 jae L192 cmp rdx, 0 jbe L194 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L196 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L197 L196: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L198 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L200 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L202 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L203 L202: L203: jmp L201 L200: L201: jmp L199 L198: L199: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L197: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L195 L194: L195: mov r15, rsi cmp rsi, rcx jbe L204 movdqu xmm0, xmmword ptr [rbx + 0] mov r10, rsi and r10, 15 cmp r10, 8 jae L206 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L207 L206: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L207: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L205 L204: L205: mov rdi, qword ptr [rsp + 288] mov rsi, qword ptr [rsp + 296] mov rdx, qword ptr [rsp + 304] mov rcx, r13 movdqu xmm0, xmm9 movdqu xmm1, xmmword ptr [r8 + 0] movdqu xmmword ptr [rbp + 0], xmm1 pxor xmm10, xmm10 mov r11, 1 pinsrq xmm10, r11, 0 vpaddd xmm1, xmm1, xmm10 cmp rdx, 0 jne L208 vpshufb xmm1, xmm1, xmm0 movdqu xmmword ptr [rbp + 32], xmm1 jmp L209 L208: movdqu xmmword ptr [rbp + 32], xmm8 add rcx, 128 pextrq rbx, xmm1, 0 and rbx, 255 vpshufb xmm1, xmm1, xmm0 lea r14, qword ptr [rdi + 96] movdqu xmm8, xmmword ptr [rbp + 32] movdqu xmm7, xmmword ptr [rdi + 80] movdqu xmm4, xmmword ptr [rdi + 64] movdqu xmm5, xmmword ptr [rdi + 48] movdqu xmm6, xmmword ptr [rdi + 32] vpshufb xmm7, xmm7, xmm0 movdqu xmm2, xmmword ptr [rdi + 16] vpshufb xmm4, xmm4, xmm0 movdqu xmm3, xmmword ptr [rdi + 0] vpshufb xmm5, xmm5, xmm0 movdqu xmmword ptr [rbp + 48], xmm4 vpshufb xmm6, xmm6, xmm0 movdqu xmmword ptr [rbp + 64], xmm5 vpshufb xmm2, xmm2, xmm0 movdqu xmmword ptr [rbp + 80], xmm6 vpshufb xmm3, xmm3, xmm0 movdqu xmmword ptr [rbp + 96], xmm2 movdqu xmmword ptr [rbp + 112], xmm3 pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 vpxor xmm4, xmm4, xmm4 movdqu xmm15, xmmword ptr [rcx + -128] vpaddd xmm10, xmm1, xmm2 vpaddd xmm11, xmm10, xmm2 vpaddd xmm12, xmm11, xmm2 vpaddd xmm13, xmm12, xmm2 vpaddd xmm14, xmm13, xmm2 vpxor xmm9, xmm1, xmm15 movdqu xmmword ptr [rbp + 16], xmm4 cmp rdx, 6 jne L210 sub r14, 96 jmp L211 L210: L211: jmp L213 ALIGN 16 L212: add rbx, 6 cmp rbx, 256 jb L214 mov r11, 579005069656919567 pinsrq xmm0, r11, 0 mov r11, 283686952306183 pinsrq xmm0, r11, 1 vpshufb xmm6, xmm1, xmm0 pxor xmm5, xmm5 mov r11, 1 pinsrq xmm5, r11, 0 vpaddd xmm10, xmm6, xmm5 pxor xmm5, xmm5 mov r11, 2 pinsrq xmm5, r11, 0 vpaddd xmm11, xmm6, xmm5 movdqu xmm3, xmmword ptr [r9 + -32] vpaddd xmm12, xmm10, xmm5 vpshufb xmm10, xmm10, xmm0 vpaddd xmm13, xmm11, xmm5 vpshufb xmm11, xmm11, xmm0 vpxor xmm10, xmm10, xmm15 vpaddd xmm14, xmm12, xmm5 vpshufb xmm12, xmm12, xmm0 vpxor xmm11, xmm11, xmm15 vpaddd xmm1, xmm13, xmm5 vpshufb xmm13, xmm13, xmm0 vpshufb xmm14, xmm14, xmm0 vpshufb xmm1, xmm1, xmm0 sub rbx, 256 jmp L215 L214: movdqu xmm3, xmmword ptr [r9 + -32] vpaddd xmm1, xmm2, xmm14 vpxor xmm10, xmm10, xmm15 vpxor xmm11, xmm11, xmm15 L215: movdqu xmmword ptr [rbp + 128], xmm1 vpclmulqdq xmm5, xmm7, xmm3, 16 vpxor xmm12, xmm12, xmm15 movdqu xmm2, xmmword ptr [rcx + -112] vpclmulqdq xmm6, xmm7, xmm3, 1 vaesenc xmm9, xmm9, xmm2 movdqu xmm0, xmmword ptr [rbp + 48] vpxor xmm13, xmm13, xmm15 vpclmulqdq xmm1, xmm7, xmm3, 0 vaesenc xmm10, xmm10, xmm2 vpxor xmm14, xmm14, xmm15 vpclmulqdq xmm7, xmm7, xmm3, 17 vaesenc xmm11, xmm11, xmm2 movdqu xmm3, xmmword ptr [r9 + -16] vaesenc xmm12, xmm12, xmm2 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm3, 0 vpxor xmm8, xmm8, xmm4 vaesenc xmm13, xmm13, xmm2 vpxor xmm4, xmm1, xmm5 vpclmulqdq xmm1, xmm0, xmm3, 16 vaesenc xmm14, xmm14, xmm2 movdqu xmm15, xmmword ptr [rcx + -96] vpclmulqdq xmm2, xmm0, xmm3, 1 vaesenc xmm9, xmm9, xmm15 vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpclmulqdq xmm3, xmm0, xmm3, 17 movdqu xmm0, xmmword ptr [rbp + 64] vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 88] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 80] vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 32], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 40], r12 movdqu xmm5, xmmword ptr [r9 + 16] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -80] vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm0, xmm5, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm5, 16 vaesenc xmm10, xmm10, xmm15 vpxor xmm7, xmm7, xmm3 vpclmulqdq xmm3, xmm0, xmm5, 1 vaesenc xmm11, xmm11, xmm15 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [rbp + 80] vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -64] vpxor xmm6, xmm6, xmm2 vpclmulqdq xmm2, xmm0, xmm1, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm1, 16 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 72] vpxor xmm7, xmm7, xmm5 vpclmulqdq xmm5, xmm0, xmm1, 1 vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 64] vpclmulqdq xmm1, xmm0, xmm1, 17 movdqu xmm0, xmmword ptr [rbp + 96] vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 48], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 56], r12 vpxor xmm4, xmm4, xmm2 movdqu xmm2, xmmword ptr [r9 + 64] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -48] vpxor xmm6, xmm6, xmm3 vpclmulqdq xmm3, xmm0, xmm2, 0 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm0, xmm2, 16 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 56] vpxor xmm7, xmm7, xmm1 vpclmulqdq xmm1, xmm0, xmm2, 1 vpxor xmm8, xmm8, xmmword ptr [rbp + 112] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 48] vpclmulqdq xmm2, xmm0, xmm2, 17 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 64], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 72], r12 vpxor xmm4, xmm4, xmm3 movdqu xmm3, xmmword ptr [r9 + 80] vaesenc xmm14, xmm14, xmm15 movdqu xmm15, xmmword ptr [rcx + -32] vpxor xmm6, xmm6, xmm5 vpclmulqdq xmm5, xmm8, xmm3, 16 vaesenc xmm9, xmm9, xmm15 vpxor xmm6, xmm6, xmm1 vpclmulqdq xmm1, xmm8, xmm3, 1 vaesenc xmm10, xmm10, xmm15 movbe r13, qword ptr [r14 + 40] vpxor xmm7, xmm7, xmm2 vpclmulqdq xmm2, xmm8, xmm3, 0 vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 32] vpclmulqdq xmm8, xmm8, xmm3, 17 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 80], r13 vaesenc xmm13, xmm13, xmm15 mov qword ptr [rbp + 88], r12 vpxor xmm6, xmm6, xmm5 vaesenc xmm14, xmm14, xmm15 vpxor xmm6, xmm6, xmm1 movdqu xmm15, xmmword ptr [rcx + -16] vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm2 pxor xmm3, xmm3 mov r11, 13979173243358019584 pinsrq xmm3, r11, 1 vaesenc xmm9, xmm9, xmm15 vpxor xmm7, xmm7, xmm8 vaesenc xmm10, xmm10, xmm15 vpxor xmm4, xmm4, xmm5 movbe r13, qword ptr [r14 + 24] vaesenc xmm11, xmm11, xmm15 movbe r12, qword ptr [r14 + 16] vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 mov qword ptr [rbp + 96], r13 vaesenc xmm12, xmm12, xmm15 mov qword ptr [rbp + 104], r12 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 movdqu xmm1, xmmword ptr [rcx + 0] vaesenc xmm9, xmm9, xmm1 movdqu xmm15, xmmword ptr [rcx + 16] vaesenc xmm10, xmm10, xmm1 vpsrldq xmm6, xmm6, 8 vaesenc xmm11, xmm11, xmm1 vpxor xmm7, xmm7, xmm6 vaesenc xmm12, xmm12, xmm1 vpxor xmm4, xmm4, xmm0 movbe r13, qword ptr [r14 + 8] vaesenc xmm13, xmm13, xmm1 movbe r12, qword ptr [r14 + 0] vaesenc xmm14, xmm14, xmm1 movdqu xmm1, xmmword ptr [rcx + 32] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 vaesenc xmm9, xmm9, xmm1 vaesenc xmm10, xmm10, xmm1 vaesenc xmm11, xmm11, xmm1 vaesenc xmm12, xmm12, xmm1 vaesenc xmm13, xmm13, xmm1 movdqu xmm15, xmmword ptr [rcx + 48] vaesenc xmm14, xmm14, xmm1 movdqu xmm1, xmmword ptr [rcx + 64] vaesenc xmm9, xmm9, xmm15 vaesenc xmm10, xmm10, xmm15 vaesenc xmm11, xmm11, xmm15 vaesenc xmm12, xmm12, xmm15 vaesenc xmm13, xmm13, xmm15 vaesenc xmm14, xmm14, xmm15 vaesenc xmm9, xmm9, xmm1 vaesenc xmm10, xmm10, xmm1 vaesenc xmm11, xmm11, xmm1 vaesenc xmm12, xmm12, xmm1 vaesenc xmm13, xmm13, xmm1 movdqu xmm15, xmmword ptr [rcx + 80] vaesenc xmm14, xmm14, xmm1 movdqu xmm1, xmmword ptr [rcx + 96] vaesenc xmm9, xmm9, xmm15 movdqu xmmword ptr [rbp + 16], xmm7 vpalignr xmm8, xmm4, xmm4, 8 vaesenc xmm10, xmm10, xmm15 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm2, xmm1, xmmword ptr [rdi + 0] vaesenc xmm11, xmm11, xmm15 vpxor xmm0, xmm1, xmmword ptr [rdi + 16] vaesenc xmm12, xmm12, xmm15 vpxor xmm5, xmm1, xmmword ptr [rdi + 32] vaesenc xmm13, xmm13, xmm15 vpxor xmm6, xmm1, xmmword ptr [rdi + 48] vaesenc xmm14, xmm14, xmm15 vpxor xmm7, xmm1, xmmword ptr [rdi + 64] vpxor xmm3, xmm1, xmmword ptr [rdi + 80] movdqu xmm1, xmmword ptr [rbp + 128] vaesenclast xmm9, xmm9, xmm2 pxor xmm2, xmm2 mov r11, 72057594037927936 pinsrq xmm2, r11, 1 vaesenclast xmm10, xmm10, xmm0 vpaddd xmm0, xmm1, xmm2 mov qword ptr [rbp + 112], r13 lea rdi, qword ptr [rdi + 96] vaesenclast xmm11, xmm11, xmm5 vpaddd xmm5, xmm0, xmm2 mov qword ptr [rbp + 120], r12 lea rsi, qword ptr [rsi + 96] movdqu xmm15, xmmword ptr [rcx + -128] vaesenclast xmm12, xmm12, xmm6 vpaddd xmm6, xmm5, xmm2 vaesenclast xmm13, xmm13, xmm7 vpaddd xmm7, xmm6, xmm2 vaesenclast xmm14, xmm14, xmm3 vpaddd xmm3, xmm7, xmm2 sub rdx, 6 cmp rdx, 6 jbe L216 add r14, 96 jmp L217 L216: L217: cmp rdx, 0 jbe L218 movdqu xmmword ptr [rsi + -96], xmm9 vpxor xmm9, xmm1, xmm15 movdqu xmmword ptr [rsi + -80], xmm10 movdqu xmm10, xmm0 movdqu xmmword ptr [rsi + -64], xmm11 movdqu xmm11, xmm5 movdqu xmmword ptr [rsi + -48], xmm12 movdqu xmm12, xmm6 movdqu xmmword ptr [rsi + -32], xmm13 movdqu xmm13, xmm7 movdqu xmmword ptr [rsi + -16], xmm14 movdqu xmm14, xmm3 movdqu xmm7, xmmword ptr [rbp + 32] jmp L219 L218: vpxor xmm8, xmm8, xmmword ptr [rbp + 16] vpxor xmm8, xmm8, xmm4 L219: ALIGN 16 L213: cmp rdx, 0 ja L212 movdqu xmmword ptr [rbp + 32], xmm1 movdqu xmmword ptr [rsi + -96], xmm9 movdqu xmmword ptr [rsi + -80], xmm10 movdqu xmmword ptr [rsi + -64], xmm11 movdqu xmmword ptr [rsi + -48], xmm12 movdqu xmmword ptr [rsi + -32], xmm13 movdqu xmmword ptr [rsi + -16], xmm14 sub rcx, 128 L209: movdqu xmm11, xmmword ptr [rbp + 32] mov r8, rcx mov rax, qword ptr [rsp + 312] mov rdi, qword ptr [rsp + 320] mov rdx, qword ptr [rsp + 328] mov r14, rdx mov r12, 579005069656919567 pinsrq xmm9, r12, 0 mov r12, 283686952306183 pinsrq xmm9, r12, 1 pshufb xmm11, xmm9 mov rbx, rdi mov r12, rdx mov rdi, rax mov r11, rdi jmp L221 ALIGN 16 L220: add r11, 80 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 80] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 add r11, 96 sub rdx, 6 ALIGN 16 L221: cmp rdx, 6 jae L220 cmp rdx, 0 jbe L222 mov r10, rdx sub r10, 1 imul r10, 16 add r11, r10 movdqu xmm5, xmmword ptr [r9 + -32] movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 cmp rdx, 1 jne L224 vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 jmp L225 L224: sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 movdqu xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + -16] vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 2 je L226 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 16] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 3 je L228 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 32] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 cmp rdx, 4 je L230 sub r11, 16 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm0, xmmword ptr [r11 + 0] pshufb xmm0, xmm9 vpxor xmm4, xmm4, xmm1 movdqu xmm1, xmmword ptr [r9 + 64] vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 movdqu xmm5, xmm1 jmp L231 L230: L231: jmp L229 L228: L229: jmp L227 L226: L227: vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 vpxor xmm4, xmm4, xmm1 vpxor xmm6, xmm6, xmm2 vpxor xmm6, xmm6, xmm3 vpxor xmm7, xmm7, xmm5 L225: pxor xmm3, xmm3 mov r10, 3254779904 pinsrd xmm3, r10d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 jmp L223 L222: L223: mov rdi, rbx mov rdx, r12 pxor xmm10, xmm10 mov rbx, 1 pinsrd xmm10, ebx, 0 mov r11, rax mov r10, rdi mov rbx, 0 jmp L233 ALIGN 16 L232: movdqu xmm0, xmm11 pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 movdqu xmm2, xmmword ptr [r11 + 0] pxor xmm2, xmm0 movdqu xmmword ptr [r10 + 0], xmm2 add rbx, 1 add r11, 16 add r10, 16 paddd xmm11, xmm10 ALIGN 16 L233: cmp rbx, rdx jne L232 add r14, qword ptr [rsp + 304] imul r14, 16 mov r13, qword ptr [rsp + 344] cmp r13, r14 jbe L234 mov rax, qword ptr [rsp + 336] mov r10, r13 and r10, 15 movdqu xmm0, xmmword ptr [rax + 0] movdqu xmm10, xmm0 cmp r10, 8 jae L236 mov rcx, 0 pinsrq xmm0, rcx, 1 mov rcx, r10 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 0 and rcx, r11 pinsrq xmm0, rcx, 0 jmp L237 L236: mov rcx, r10 sub rcx, 8 shl rcx, 3 mov r11, 1 shl r11, cl sub r11, 1 pextrq rcx, xmm0, 1 and rcx, r11 pinsrq xmm0, rcx, 1 L237: pshufb xmm0, xmm9 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 movdqu xmm0, xmm11 pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pxor xmm10, xmm0 movdqu xmmword ptr [rax + 0], xmm10 jmp L235 L234: L235: mov r11, r15 pxor xmm0, xmm0 mov rax, r11 imul rax, 8 pinsrq xmm0, rax, 1 mov rax, r13 imul rax, 8 pinsrq xmm0, rax, 0 movdqu xmm5, xmmword ptr [r9 + -32] vpxor xmm0, xmm8, xmm0 vpclmulqdq xmm1, xmm0, xmm5, 0 vpclmulqdq xmm2, xmm0, xmm5, 16 vpclmulqdq xmm3, xmm0, xmm5, 1 vpclmulqdq xmm5, xmm0, xmm5, 17 movdqu xmm4, xmm1 vpxor xmm6, xmm2, xmm3 movdqu xmm7, xmm5 pxor xmm3, xmm3 mov r11, 3254779904 pinsrd xmm3, r11d, 3 vpslldq xmm5, xmm6, 8 vpxor xmm4, xmm4, xmm5 vpalignr xmm0, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpsrldq xmm6, xmm6, 8 vpxor xmm7, xmm7, xmm6 vpxor xmm4, xmm4, xmm0 vpalignr xmm8, xmm4, xmm4, 8 vpclmulqdq xmm4, xmm4, xmm3, 16 vpxor xmm8, xmm8, xmm7 vpxor xmm8, xmm8, xmm4 movdqu xmm0, xmmword ptr [rbp + 0] pshufb xmm0, xmm9 movdqu xmm2, xmmword ptr [r8 + 0] pxor xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 16] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 32] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 48] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 64] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 80] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 96] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 112] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 128] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 144] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 160] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 176] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 192] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 208] aesenc xmm0, xmm2 movdqu xmm2, xmmword ptr [r8 + 224] aesenclast xmm0, xmm2 pxor xmm2, xmm2 pshufb xmm8, xmm9 pxor xmm8, xmm0 mov r15, qword ptr [rsp + 360] movdqu xmm0, xmmword ptr [r15 + 0] pcmpeqd xmm0, xmm8 pextrq rdx, xmm0, 0 sub rdx, 18446744073709551615 mov rax, 0 adc rax, 0 pextrq rdx, xmm0, 1 sub rdx, 18446744073709551615 mov rdx, 0 adc rdx, 0 add rax, rdx mov rcx, rax pop rax pinsrq xmm6, rax, 1 pop rax pinsrq xmm6, rax, 0 pop rax pinsrq xmm7, rax, 1 pop rax pinsrq xmm7, rax, 0 pop rax pinsrq xmm8, rax, 1 pop rax pinsrq xmm8, rax, 0 pop rax pinsrq xmm9, rax, 1 pop rax pinsrq xmm9, rax, 0 pop rax pinsrq xmm10, rax, 1 pop rax pinsrq xmm10, rax, 0 pop rax pinsrq xmm11, rax, 1 pop rax pinsrq xmm11, rax, 0 pop rax pinsrq xmm12, rax, 1 pop rax pinsrq xmm12, rax, 0 pop rax pinsrq xmm13, rax, 1 pop rax pinsrq xmm13, rax, 0 pop rax pinsrq xmm14, rax, 1 pop rax pinsrq xmm14, rax, 0 pop rax pinsrq xmm15, rax, 1 pop rax pinsrq xmm15, rax, 0 pop rbx pop rbp pop rdi pop rsi pop r12 pop r13 pop r14 pop r15 mov rax, rcx ret gcm256_decrypt_opt endp end