; This file is generated from a similarly-named Perl script in the BoringSSL ; source tree. Do not edit by hand. %include "ring_core_generated/prefix_symbols_nasm.inc" %ifidn __OUTPUT_FORMAT__, win32 %ifidn __OUTPUT_FORMAT__,obj section code use32 class=code align=64 %elifidn __OUTPUT_FORMAT__,win32 $@feat.00 equ 1 section .text code align=64 %else section .text code %endif global _ChaCha20_ctr32 align 16 _ChaCha20_ctr32: L$_ChaCha20_ctr32_begin: push ebp push ebx push esi push edi xor eax,eax cmp eax,DWORD [28+esp] je NEAR L$000no_data call L$pic_point L$pic_point: pop eax lea ebp,[_OPENSSL_ia32cap_P] test DWORD [ebp],16777216 jz NEAR L$001x86 test DWORD [4+ebp],512 jz NEAR L$001x86 jmp NEAR L$ssse3_shortcut L$001x86: mov esi,DWORD [32+esp] mov edi,DWORD [36+esp] sub esp,132 mov eax,DWORD [esi] mov ebx,DWORD [4+esi] mov ecx,DWORD [8+esi] mov edx,DWORD [12+esi] mov DWORD [80+esp],eax mov DWORD [84+esp],ebx mov DWORD [88+esp],ecx mov DWORD [92+esp],edx mov eax,DWORD [16+esi] mov ebx,DWORD [20+esi] mov ecx,DWORD [24+esi] mov edx,DWORD [28+esi] mov DWORD [96+esp],eax mov DWORD [100+esp],ebx mov DWORD [104+esp],ecx mov DWORD [108+esp],edx mov eax,DWORD [edi] mov ebx,DWORD [4+edi] mov ecx,DWORD [8+edi] mov edx,DWORD [12+edi] sub eax,1 mov DWORD [112+esp],eax mov DWORD [116+esp],ebx mov DWORD [120+esp],ecx mov DWORD [124+esp],edx jmp NEAR L$002entry align 16 L$003outer_loop: mov DWORD [156+esp],ebx mov DWORD [152+esp],eax mov DWORD [160+esp],ecx L$002entry: mov eax,1634760805 mov DWORD [4+esp],857760878 mov DWORD [8+esp],2036477234 mov DWORD [12+esp],1797285236 mov ebx,DWORD [84+esp] mov ebp,DWORD [88+esp] mov ecx,DWORD [104+esp] mov esi,DWORD [108+esp] mov edx,DWORD [116+esp] mov edi,DWORD [120+esp] mov DWORD [20+esp],ebx mov DWORD [24+esp],ebp mov DWORD [40+esp],ecx mov DWORD [44+esp],esi mov DWORD [52+esp],edx mov DWORD [56+esp],edi mov ebx,DWORD [92+esp] mov edi,DWORD [124+esp] mov edx,DWORD [112+esp] mov ebp,DWORD [80+esp] mov ecx,DWORD [96+esp] mov esi,DWORD [100+esp] add edx,1 mov DWORD [28+esp],ebx mov DWORD [60+esp],edi mov DWORD [112+esp],edx mov ebx,10 jmp NEAR L$004loop align 16 L$004loop: add eax,ebp mov DWORD [128+esp],ebx mov ebx,ebp xor edx,eax rol edx,16 add ecx,edx xor ebx,ecx mov edi,DWORD [52+esp] rol ebx,12 mov ebp,DWORD [20+esp] add eax,ebx xor edx,eax mov DWORD [esp],eax rol edx,8 mov eax,DWORD [4+esp] add ecx,edx mov DWORD [48+esp],edx xor ebx,ecx add eax,ebp rol ebx,7 xor edi,eax mov DWORD [32+esp],ecx rol edi,16 mov DWORD [16+esp],ebx add esi,edi mov ecx,DWORD [40+esp] xor ebp,esi mov edx,DWORD [56+esp] rol ebp,12 mov ebx,DWORD [24+esp] add eax,ebp xor edi,eax mov DWORD [4+esp],eax rol edi,8 mov eax,DWORD [8+esp] add esi,edi mov DWORD [52+esp],edi xor ebp,esi add eax,ebx rol ebp,7 xor edx,eax mov DWORD [36+esp],esi rol edx,16 mov DWORD [20+esp],ebp add ecx,edx mov esi,DWORD [44+esp] xor ebx,ecx mov edi,DWORD [60+esp] rol ebx,12 mov ebp,DWORD [28+esp] add eax,ebx xor edx,eax mov DWORD [8+esp],eax rol edx,8 mov eax,DWORD [12+esp] add ecx,edx mov DWORD [56+esp],edx xor ebx,ecx add eax,ebp rol ebx,7 xor edi,eax rol edi,16 mov DWORD [24+esp],ebx add esi,edi xor ebp,esi rol ebp,12 mov ebx,DWORD [20+esp] add eax,ebp xor edi,eax mov DWORD [12+esp],eax rol edi,8 mov eax,DWORD [esp] add esi,edi mov edx,edi xor ebp,esi add eax,ebx rol ebp,7 xor edx,eax rol edx,16 mov DWORD [28+esp],ebp add ecx,edx xor ebx,ecx mov edi,DWORD [48+esp] rol ebx,12 mov ebp,DWORD [24+esp] add eax,ebx xor edx,eax mov DWORD [esp],eax rol edx,8 mov eax,DWORD [4+esp] add ecx,edx mov DWORD [60+esp],edx xor ebx,ecx add eax,ebp rol ebx,7 xor edi,eax mov DWORD [40+esp],ecx rol edi,16 mov DWORD [20+esp],ebx add esi,edi mov ecx,DWORD [32+esp] xor ebp,esi mov edx,DWORD [52+esp] rol ebp,12 mov ebx,DWORD [28+esp] add eax,ebp xor edi,eax mov DWORD [4+esp],eax rol edi,8 mov eax,DWORD [8+esp] add esi,edi mov DWORD [48+esp],edi xor ebp,esi add eax,ebx rol ebp,7 xor edx,eax mov DWORD [44+esp],esi rol edx,16 mov DWORD [24+esp],ebp add ecx,edx mov esi,DWORD [36+esp] xor ebx,ecx mov edi,DWORD [56+esp] rol ebx,12 mov ebp,DWORD [16+esp] add eax,ebx xor edx,eax mov DWORD [8+esp],eax rol edx,8 mov eax,DWORD [12+esp] add ecx,edx mov DWORD [52+esp],edx xor ebx,ecx add eax,ebp rol ebx,7 xor edi,eax rol edi,16 mov DWORD [28+esp],ebx add esi,edi xor ebp,esi mov edx,DWORD [48+esp] rol ebp,12 mov ebx,DWORD [128+esp] add eax,ebp xor edi,eax mov DWORD [12+esp],eax rol edi,8 mov eax,DWORD [esp] add esi,edi mov DWORD [56+esp],edi xor ebp,esi rol ebp,7 dec ebx jnz NEAR L$004loop mov ebx,DWORD [160+esp] add eax,1634760805 add ebp,DWORD [80+esp] add ecx,DWORD [96+esp] add esi,DWORD [100+esp] cmp ebx,64 jb NEAR L$005tail mov ebx,DWORD [156+esp] add edx,DWORD [112+esp] add edi,DWORD [120+esp] xor eax,DWORD [ebx] xor ebp,DWORD [16+ebx] mov DWORD [esp],eax mov eax,DWORD [152+esp] xor ecx,DWORD [32+ebx] xor esi,DWORD [36+ebx] xor edx,DWORD [48+ebx] xor edi,DWORD [56+ebx] mov DWORD [16+eax],ebp mov DWORD [32+eax],ecx mov DWORD [36+eax],esi mov DWORD [48+eax],edx mov DWORD [56+eax],edi mov ebp,DWORD [4+esp] mov ecx,DWORD [8+esp] mov esi,DWORD [12+esp] mov edx,DWORD [20+esp] mov edi,DWORD [24+esp] add ebp,857760878 add ecx,2036477234 add esi,1797285236 add edx,DWORD [84+esp] add edi,DWORD [88+esp] xor ebp,DWORD [4+ebx] xor ecx,DWORD [8+ebx] xor esi,DWORD [12+ebx] xor edx,DWORD [20+ebx] xor edi,DWORD [24+ebx] mov DWORD [4+eax],ebp mov DWORD [8+eax],ecx mov DWORD [12+eax],esi mov DWORD [20+eax],edx mov DWORD [24+eax],edi mov ebp,DWORD [28+esp] mov ecx,DWORD [40+esp] mov esi,DWORD [44+esp] mov edx,DWORD [52+esp] mov edi,DWORD [60+esp] add ebp,DWORD [92+esp] add ecx,DWORD [104+esp] add esi,DWORD [108+esp] add edx,DWORD [116+esp] add edi,DWORD [124+esp] xor ebp,DWORD [28+ebx] xor ecx,DWORD [40+ebx] xor esi,DWORD [44+ebx] xor edx,DWORD [52+ebx] xor edi,DWORD [60+ebx] lea ebx,[64+ebx] mov DWORD [28+eax],ebp mov ebp,DWORD [esp] mov DWORD [40+eax],ecx mov ecx,DWORD [160+esp] mov DWORD [44+eax],esi mov DWORD [52+eax],edx mov DWORD [60+eax],edi mov DWORD [eax],ebp lea eax,[64+eax] sub ecx,64 jnz NEAR L$003outer_loop jmp NEAR L$006done L$005tail: add edx,DWORD [112+esp] add edi,DWORD [120+esp] mov DWORD [esp],eax mov DWORD [16+esp],ebp mov DWORD [32+esp],ecx mov DWORD [36+esp],esi mov DWORD [48+esp],edx mov DWORD [56+esp],edi mov ebp,DWORD [4+esp] mov ecx,DWORD [8+esp] mov esi,DWORD [12+esp] mov edx,DWORD [20+esp] mov edi,DWORD [24+esp] add ebp,857760878 add ecx,2036477234 add esi,1797285236 add edx,DWORD [84+esp] add edi,DWORD [88+esp] mov DWORD [4+esp],ebp mov DWORD [8+esp],ecx mov DWORD [12+esp],esi mov DWORD [20+esp],edx mov DWORD [24+esp],edi mov ebp,DWORD [28+esp] mov ecx,DWORD [40+esp] mov esi,DWORD [44+esp] mov edx,DWORD [52+esp] mov edi,DWORD [60+esp] add ebp,DWORD [92+esp] add ecx,DWORD [104+esp] add esi,DWORD [108+esp] add edx,DWORD [116+esp] add edi,DWORD [124+esp] mov DWORD [28+esp],ebp mov ebp,DWORD [156+esp] mov DWORD [40+esp],ecx mov ecx,DWORD [152+esp] mov DWORD [44+esp],esi xor esi,esi mov DWORD [52+esp],edx mov DWORD [60+esp],edi xor eax,eax xor edx,edx L$007tail_loop: mov al,BYTE [ebp*1+esi] mov dl,BYTE [esi*1+esp] lea esi,[1+esi] xor al,dl mov BYTE [esi*1+ecx-1],al dec ebx jnz NEAR L$007tail_loop L$006done: add esp,132 L$000no_data: pop edi pop esi pop ebx pop ebp ret align 16 __ChaCha20_ssse3: push ebp push ebx push esi push edi L$ssse3_shortcut: mov edi,DWORD [20+esp] mov esi,DWORD [24+esp] mov ecx,DWORD [28+esp] mov edx,DWORD [32+esp] mov ebx,DWORD [36+esp] mov ebp,esp sub esp,524 and esp,-64 mov DWORD [512+esp],ebp lea eax,[(L$ssse3_data-L$pic_point)+eax] movdqu xmm3,[ebx] cmp ecx,256 jb NEAR L$0081x mov DWORD [516+esp],edx mov DWORD [520+esp],ebx sub ecx,256 lea ebp,[384+esp] movdqu xmm7,[edx] pshufd xmm0,xmm3,0 pshufd xmm1,xmm3,85 pshufd xmm2,xmm3,170 pshufd xmm3,xmm3,255 paddd xmm0,[48+eax] pshufd xmm4,xmm7,0 pshufd xmm5,xmm7,85 psubd xmm0,[64+eax] pshufd xmm6,xmm7,170 pshufd xmm7,xmm7,255 movdqa [64+ebp],xmm0 movdqa [80+ebp],xmm1 movdqa [96+ebp],xmm2 movdqa [112+ebp],xmm3 movdqu xmm3,[16+edx] movdqa [ebp-64],xmm4 movdqa [ebp-48],xmm5 movdqa [ebp-32],xmm6 movdqa [ebp-16],xmm7 movdqa xmm7,[32+eax] lea ebx,[128+esp] pshufd xmm0,xmm3,0 pshufd xmm1,xmm3,85 pshufd xmm2,xmm3,170 pshufd xmm3,xmm3,255 pshufd xmm4,xmm7,0 pshufd xmm5,xmm7,85 pshufd xmm6,xmm7,170 pshufd xmm7,xmm7,255 movdqa [ebp],xmm0 movdqa [16+ebp],xmm1 movdqa [32+ebp],xmm2 movdqa [48+ebp],xmm3 movdqa [ebp-128],xmm4 movdqa [ebp-112],xmm5 movdqa [ebp-96],xmm6 movdqa [ebp-80],xmm7 lea esi,[128+esi] lea edi,[128+edi] jmp NEAR L$009outer_loop align 16 L$009outer_loop: movdqa xmm1,[ebp-112] movdqa xmm2,[ebp-96] movdqa xmm3,[ebp-80] movdqa xmm5,[ebp-48] movdqa xmm6,[ebp-32] movdqa xmm7,[ebp-16] movdqa [ebx-112],xmm1 movdqa [ebx-96],xmm2 movdqa [ebx-80],xmm3 movdqa [ebx-48],xmm5 movdqa [ebx-32],xmm6 movdqa [ebx-16],xmm7 movdqa xmm2,[32+ebp] movdqa xmm3,[48+ebp] movdqa xmm4,[64+ebp] movdqa xmm5,[80+ebp] movdqa xmm6,[96+ebp] movdqa xmm7,[112+ebp] paddd xmm4,[64+eax] movdqa [32+ebx],xmm2 movdqa [48+ebx],xmm3 movdqa [64+ebx],xmm4 movdqa [80+ebx],xmm5 movdqa [96+ebx],xmm6 movdqa [112+ebx],xmm7 movdqa [64+ebp],xmm4 movdqa xmm0,[ebp-128] movdqa xmm6,xmm4 movdqa xmm3,[ebp-64] movdqa xmm4,[ebp] movdqa xmm5,[16+ebp] mov edx,10 nop align 16 L$010loop: paddd xmm0,xmm3 movdqa xmm2,xmm3 pxor xmm6,xmm0 pshufb xmm6,[eax] paddd xmm4,xmm6 pxor xmm2,xmm4 movdqa xmm3,[ebx-48] movdqa xmm1,xmm2 pslld xmm2,12 psrld xmm1,20 por xmm2,xmm1 movdqa xmm1,[ebx-112] paddd xmm0,xmm2 movdqa xmm7,[80+ebx] pxor xmm6,xmm0 movdqa [ebx-128],xmm0 pshufb xmm6,[16+eax] paddd xmm4,xmm6 movdqa [64+ebx],xmm6 pxor xmm2,xmm4 paddd xmm1,xmm3 movdqa xmm0,xmm2 pslld xmm2,7 psrld xmm0,25 pxor xmm7,xmm1 por xmm2,xmm0 movdqa [ebx],xmm4 pshufb xmm7,[eax] movdqa [ebx-64],xmm2 paddd xmm5,xmm7 movdqa xmm4,[32+ebx] pxor xmm3,xmm5 movdqa xmm2,[ebx-32] movdqa xmm0,xmm3 pslld xmm3,12 psrld xmm0,20 por xmm3,xmm0 movdqa xmm0,[ebx-96] paddd xmm1,xmm3 movdqa xmm6,[96+ebx] pxor xmm7,xmm1 movdqa [ebx-112],xmm1 pshufb xmm7,[16+eax] paddd xmm5,xmm7 movdqa [80+ebx],xmm7 pxor xmm3,xmm5 paddd xmm0,xmm2 movdqa xmm1,xmm3 pslld xmm3,7 psrld xmm1,25 pxor xmm6,xmm0 por xmm3,xmm1 movdqa [16+ebx],xmm5 pshufb xmm6,[eax] movdqa [ebx-48],xmm3 paddd xmm4,xmm6 movdqa xmm5,[48+ebx] pxor xmm2,xmm4 movdqa xmm3,[ebx-16] movdqa xmm1,xmm2 pslld xmm2,12 psrld xmm1,20 por xmm2,xmm1 movdqa xmm1,[ebx-80] paddd xmm0,xmm2 movdqa xmm7,[112+ebx] pxor xmm6,xmm0 movdqa [ebx-96],xmm0 pshufb xmm6,[16+eax] paddd xmm4,xmm6 movdqa [96+ebx],xmm6 pxor xmm2,xmm4 paddd xmm1,xmm3 movdqa xmm0,xmm2 pslld xmm2,7 psrld xmm0,25 pxor xmm7,xmm1 por xmm2,xmm0 pshufb xmm7,[eax] movdqa [ebx-32],xmm2 paddd xmm5,xmm7 pxor xmm3,xmm5 movdqa xmm2,[ebx-48] movdqa xmm0,xmm3 pslld xmm3,12 psrld xmm0,20 por xmm3,xmm0 movdqa xmm0,[ebx-128] paddd xmm1,xmm3 pxor xmm7,xmm1 movdqa [ebx-80],xmm1 pshufb xmm7,[16+eax] paddd xmm5,xmm7 movdqa xmm6,xmm7 pxor xmm3,xmm5 paddd xmm0,xmm2 movdqa xmm1,xmm3 pslld xmm3,7 psrld xmm1,25 pxor xmm6,xmm0 por xmm3,xmm1 pshufb xmm6,[eax] movdqa [ebx-16],xmm3 paddd xmm4,xmm6 pxor xmm2,xmm4 movdqa xmm3,[ebx-32] movdqa xmm1,xmm2 pslld xmm2,12 psrld xmm1,20 por xmm2,xmm1 movdqa xmm1,[ebx-112] paddd xmm0,xmm2 movdqa xmm7,[64+ebx] pxor xmm6,xmm0 movdqa [ebx-128],xmm0 pshufb xmm6,[16+eax] paddd xmm4,xmm6 movdqa [112+ebx],xmm6 pxor xmm2,xmm4 paddd xmm1,xmm3 movdqa xmm0,xmm2 pslld xmm2,7 psrld xmm0,25 pxor xmm7,xmm1 por xmm2,xmm0 movdqa [32+ebx],xmm4 pshufb xmm7,[eax] movdqa [ebx-48],xmm2 paddd xmm5,xmm7 movdqa xmm4,[ebx] pxor xmm3,xmm5 movdqa xmm2,[ebx-16] movdqa xmm0,xmm3 pslld xmm3,12 psrld xmm0,20 por xmm3,xmm0 movdqa xmm0,[ebx-96] paddd xmm1,xmm3 movdqa xmm6,[80+ebx] pxor xmm7,xmm1 movdqa [ebx-112],xmm1 pshufb xmm7,[16+eax] paddd xmm5,xmm7 movdqa [64+ebx],xmm7 pxor xmm3,xmm5 paddd xmm0,xmm2 movdqa xmm1,xmm3 pslld xmm3,7 psrld xmm1,25 pxor xmm6,xmm0 por xmm3,xmm1 movdqa [48+ebx],xmm5 pshufb xmm6,[eax] movdqa [ebx-32],xmm3 paddd xmm4,xmm6 movdqa xmm5,[16+ebx] pxor xmm2,xmm4 movdqa xmm3,[ebx-64] movdqa xmm1,xmm2 pslld xmm2,12 psrld xmm1,20 por xmm2,xmm1 movdqa xmm1,[ebx-80] paddd xmm0,xmm2 movdqa xmm7,[96+ebx] pxor xmm6,xmm0 movdqa [ebx-96],xmm0 pshufb xmm6,[16+eax] paddd xmm4,xmm6 movdqa [80+ebx],xmm6 pxor xmm2,xmm4 paddd xmm1,xmm3 movdqa xmm0,xmm2 pslld xmm2,7 psrld xmm0,25 pxor xmm7,xmm1 por xmm2,xmm0 pshufb xmm7,[eax] movdqa [ebx-16],xmm2 paddd xmm5,xmm7 pxor xmm3,xmm5 movdqa xmm0,xmm3 pslld xmm3,12 psrld xmm0,20 por xmm3,xmm0 movdqa xmm0,[ebx-128] paddd xmm1,xmm3 movdqa xmm6,[64+ebx] pxor xmm7,xmm1 movdqa [ebx-80],xmm1 pshufb xmm7,[16+eax] paddd xmm5,xmm7 movdqa [96+ebx],xmm7 pxor xmm3,xmm5 movdqa xmm1,xmm3 pslld xmm3,7 psrld xmm1,25 por xmm3,xmm1 dec edx jnz NEAR L$010loop movdqa [ebx-64],xmm3 movdqa [ebx],xmm4 movdqa [16+ebx],xmm5 movdqa [64+ebx],xmm6 movdqa [96+ebx],xmm7 movdqa xmm1,[ebx-112] movdqa xmm2,[ebx-96] movdqa xmm3,[ebx-80] paddd xmm0,[ebp-128] paddd xmm1,[ebp-112] paddd xmm2,[ebp-96] paddd xmm3,[ebp-80] movdqa xmm6,xmm0 punpckldq xmm0,xmm1 movdqa xmm7,xmm2 punpckldq xmm2,xmm3 punpckhdq xmm6,xmm1 punpckhdq xmm7,xmm3 movdqa xmm1,xmm0 punpcklqdq xmm0,xmm2 movdqa xmm3,xmm6 punpcklqdq xmm6,xmm7 punpckhqdq xmm1,xmm2 punpckhqdq xmm3,xmm7 movdqu xmm4,[esi-128] movdqu xmm5,[esi-64] movdqu xmm2,[esi] movdqu xmm7,[64+esi] lea esi,[16+esi] pxor xmm4,xmm0 movdqa xmm0,[ebx-64] pxor xmm5,xmm1 movdqa xmm1,[ebx-48] pxor xmm6,xmm2 movdqa xmm2,[ebx-32] pxor xmm7,xmm3 movdqa xmm3,[ebx-16] movdqu [edi-128],xmm4 movdqu [edi-64],xmm5 movdqu [edi],xmm6 movdqu [64+edi],xmm7 lea edi,[16+edi] paddd xmm0,[ebp-64] paddd xmm1,[ebp-48] paddd xmm2,[ebp-32] paddd xmm3,[ebp-16] movdqa xmm6,xmm0 punpckldq xmm0,xmm1 movdqa xmm7,xmm2 punpckldq xmm2,xmm3 punpckhdq xmm6,xmm1 punpckhdq xmm7,xmm3 movdqa xmm1,xmm0 punpcklqdq xmm0,xmm2 movdqa xmm3,xmm6 punpcklqdq xmm6,xmm7 punpckhqdq xmm1,xmm2 punpckhqdq xmm3,xmm7 movdqu xmm4,[esi-128] movdqu xmm5,[esi-64] movdqu xmm2,[esi] movdqu xmm7,[64+esi] lea esi,[16+esi] pxor xmm4,xmm0 movdqa xmm0,[ebx] pxor xmm5,xmm1 movdqa xmm1,[16+ebx] pxor xmm6,xmm2 movdqa xmm2,[32+ebx] pxor xmm7,xmm3 movdqa xmm3,[48+ebx] movdqu [edi-128],xmm4 movdqu [edi-64],xmm5 movdqu [edi],xmm6 movdqu [64+edi],xmm7 lea edi,[16+edi] paddd xmm0,[ebp] paddd xmm1,[16+ebp] paddd xmm2,[32+ebp] paddd xmm3,[48+ebp] movdqa xmm6,xmm0 punpckldq xmm0,xmm1 movdqa xmm7,xmm2 punpckldq xmm2,xmm3 punpckhdq xmm6,xmm1 punpckhdq xmm7,xmm3 movdqa xmm1,xmm0 punpcklqdq xmm0,xmm2 movdqa xmm3,xmm6 punpcklqdq xmm6,xmm7 punpckhqdq xmm1,xmm2 punpckhqdq xmm3,xmm7 movdqu xmm4,[esi-128] movdqu xmm5,[esi-64] movdqu xmm2,[esi] movdqu xmm7,[64+esi] lea esi,[16+esi] pxor xmm4,xmm0 movdqa xmm0,[64+ebx] pxor xmm5,xmm1 movdqa xmm1,[80+ebx] pxor xmm6,xmm2 movdqa xmm2,[96+ebx] pxor xmm7,xmm3 movdqa xmm3,[112+ebx] movdqu [edi-128],xmm4 movdqu [edi-64],xmm5 movdqu [edi],xmm6 movdqu [64+edi],xmm7 lea edi,[16+edi] paddd xmm0,[64+ebp] paddd xmm1,[80+ebp] paddd xmm2,[96+ebp] paddd xmm3,[112+ebp] movdqa xmm6,xmm0 punpckldq xmm0,xmm1 movdqa xmm7,xmm2 punpckldq xmm2,xmm3 punpckhdq xmm6,xmm1 punpckhdq xmm7,xmm3 movdqa xmm1,xmm0 punpcklqdq xmm0,xmm2 movdqa xmm3,xmm6 punpcklqdq xmm6,xmm7 punpckhqdq xmm1,xmm2 punpckhqdq xmm3,xmm7 movdqu xmm4,[esi-128] movdqu xmm5,[esi-64] movdqu xmm2,[esi] movdqu xmm7,[64+esi] lea esi,[208+esi] pxor xmm4,xmm0 pxor xmm5,xmm1 pxor xmm6,xmm2 pxor xmm7,xmm3 movdqu [edi-128],xmm4 movdqu [edi-64],xmm5 movdqu [edi],xmm6 movdqu [64+edi],xmm7 lea edi,[208+edi] sub ecx,256 jnc NEAR L$009outer_loop add ecx,256 jz NEAR L$011done mov ebx,DWORD [520+esp] lea esi,[esi-128] mov edx,DWORD [516+esp] lea edi,[edi-128] movd xmm2,DWORD [64+ebp] movdqu xmm3,[ebx] paddd xmm2,[96+eax] pand xmm3,[112+eax] por xmm3,xmm2 L$0081x: movdqa xmm0,[32+eax] movdqu xmm1,[edx] movdqu xmm2,[16+edx] movdqa xmm6,[eax] movdqa xmm7,[16+eax] mov DWORD [48+esp],ebp movdqa [esp],xmm0 movdqa [16+esp],xmm1 movdqa [32+esp],xmm2 movdqa [48+esp],xmm3 mov edx,10 jmp NEAR L$012loop1x align 16 L$013outer1x: movdqa xmm3,[80+eax] movdqa xmm0,[esp] movdqa xmm1,[16+esp] movdqa xmm2,[32+esp] paddd xmm3,[48+esp] mov edx,10 movdqa [48+esp],xmm3 jmp NEAR L$012loop1x align 16 L$012loop1x: paddd xmm0,xmm1 pxor xmm3,xmm0 db 102,15,56,0,222 paddd xmm2,xmm3 pxor xmm1,xmm2 movdqa xmm4,xmm1 psrld xmm1,20 pslld xmm4,12 por xmm1,xmm4 paddd xmm0,xmm1 pxor xmm3,xmm0 db 102,15,56,0,223 paddd xmm2,xmm3 pxor xmm1,xmm2 movdqa xmm4,xmm1 psrld xmm1,25 pslld xmm4,7 por xmm1,xmm4 pshufd xmm2,xmm2,78 pshufd xmm1,xmm1,57 pshufd xmm3,xmm3,147 nop paddd xmm0,xmm1 pxor xmm3,xmm0 db 102,15,56,0,222 paddd xmm2,xmm3 pxor xmm1,xmm2 movdqa xmm4,xmm1 psrld xmm1,20 pslld xmm4,12 por xmm1,xmm4 paddd xmm0,xmm1 pxor xmm3,xmm0 db 102,15,56,0,223 paddd xmm2,xmm3 pxor xmm1,xmm2 movdqa xmm4,xmm1 psrld xmm1,25 pslld xmm4,7 por xmm1,xmm4 pshufd xmm2,xmm2,78 pshufd xmm1,xmm1,147 pshufd xmm3,xmm3,57 dec edx jnz NEAR L$012loop1x paddd xmm0,[esp] paddd xmm1,[16+esp] paddd xmm2,[32+esp] paddd xmm3,[48+esp] cmp ecx,64 jb NEAR L$014tail movdqu xmm4,[esi] movdqu xmm5,[16+esi] pxor xmm0,xmm4 movdqu xmm4,[32+esi] pxor xmm1,xmm5 movdqu xmm5,[48+esi] pxor xmm2,xmm4 pxor xmm3,xmm5 lea esi,[64+esi] movdqu [edi],xmm0 movdqu [16+edi],xmm1 movdqu [32+edi],xmm2 movdqu [48+edi],xmm3 lea edi,[64+edi] sub ecx,64 jnz NEAR L$013outer1x jmp NEAR L$011done L$014tail: movdqa [esp],xmm0 movdqa [16+esp],xmm1 movdqa [32+esp],xmm2 movdqa [48+esp],xmm3 xor eax,eax xor edx,edx xor ebp,ebp L$015tail_loop: mov al,BYTE [ebp*1+esp] mov dl,BYTE [ebp*1+esi] lea ebp,[1+ebp] xor al,dl mov BYTE [ebp*1+edi-1],al dec ecx jnz NEAR L$015tail_loop L$011done: mov esp,DWORD [512+esp] pop edi pop esi pop ebx pop ebp ret align 64 L$ssse3_data: db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 dd 1634760805,857760878,2036477234,1797285236 dd 0,1,2,3 dd 4,4,4,4 dd 1,0,0,0 dd 4,0,0,0 dd 0,-1,-1,-1 align 64 db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 db 114,103,62,0 segment .bss common _OPENSSL_ia32cap_P 16 %else ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 ret %endif