#include "namespace.h" #define vec_mul_asm CRYPTO_NAMESPACE(vec_mul_asm) #define _vec_mul_asm _CRYPTO_NAMESPACE(vec_mul_asm) # qhasm: int64 input_0 # qhasm: int64 input_1 # qhasm: int64 input_2 # qhasm: int64 input_3 # qhasm: int64 input_4 # qhasm: int64 input_5 # qhasm: stack64 input_6 # qhasm: stack64 input_7 # qhasm: int64 caller_r11 # qhasm: int64 caller_r12 # qhasm: int64 caller_r13 # qhasm: int64 caller_r14 # qhasm: int64 caller_r15 # qhasm: int64 caller_rbx # qhasm: int64 caller_rbp # qhasm: reg256 s0 # qhasm: reg256 s1 # qhasm: reg256 s2 # qhasm: reg256 s3 # qhasm: reg256 s4 # qhasm: reg256 s5 # qhasm: reg256 t0 # qhasm: reg256 t1 # qhasm: reg256 t2 # qhasm: reg256 b0 # qhasm: reg256 b1 # qhasm: reg256 b2 # qhasm: reg256 b3 # qhasm: reg256 b4 # qhasm: reg256 b5 # qhasm: reg256 a0 # qhasm: reg256 a1 # qhasm: reg256 a2 # qhasm: reg256 a3 # qhasm: reg256 a4 # qhasm: reg256 a5 # qhasm: reg256 r0 # qhasm: reg256 r1 # qhasm: reg256 r2 # qhasm: reg256 r3 # qhasm: reg256 r4 # qhasm: reg256 r5 # qhasm: reg256 r6 # qhasm: reg256 r7 # qhasm: reg256 r8 # qhasm: reg256 r9 # qhasm: reg256 r10 # qhasm: reg256 r11 # qhasm: reg256 r12 # qhasm: reg256 r13 # qhasm: reg256 r14 # qhasm: reg256 r15 # qhasm: reg256 r16 # qhasm: reg256 r17 # qhasm: reg256 r18 # qhasm: reg256 r19 # qhasm: reg256 r20 # qhasm: reg256 r21 # qhasm: reg256 r22 # qhasm: reg256 r # qhasm: int64 h0 # qhasm: int64 h1 # qhasm: int64 h2 # qhasm: int64 h3 # qhasm: int64 h4 # qhasm: int64 h5 # qhasm: int64 h6 # qhasm: int64 h7 # qhasm: int64 h8 # qhasm: int64 h9 # qhasm: int64 h10 # qhasm: int64 h11 # qhasm: int64 h12 # qhasm: int64 h13 # qhasm: int64 h14 # qhasm: int64 h15 # qhasm: int64 h16 # qhasm: int64 h17 # qhasm: int64 h18 # qhasm: int64 h19 # qhasm: int64 h20 # qhasm: int64 h21 # qhasm: int64 h22 # qhasm: stack4864 buf # qhasm: int64 ptr # qhasm: int64 tmp # qhasm: stack64 r11_stack # qhasm: stack64 r12_stack # qhasm: stack64 r13_stack # qhasm: stack64 r14_stack # qhasm: stack64 r15_stack # qhasm: stack64 rbx_stack # qhasm: stack64 rbp_stack # qhasm: enter vec_mul_asm .p2align 5 .global _vec_mul_asm .global vec_mul_asm _vec_mul_asm: vec_mul_asm: mov % rsp, % r11 and $31, % r11 add $672, % r11 sub % r11, % rsp # qhasm: r11_stack = caller_r11 # asm 1: movq r11_stack=stack64#1 # asm 2: movq r11_stack=608(%rsp) movq % r11, 608( % rsp) # qhasm: r12_stack = caller_r12 # asm 1: movq r12_stack=stack64#2 # asm 2: movq r12_stack=616(%rsp) movq % r12, 616( % rsp) # qhasm: r13_stack = caller_r13 # asm 1: movq r13_stack=stack64#3 # asm 2: movq r13_stack=624(%rsp) movq % r13, 624( % rsp) # qhasm: r14_stack = caller_r14 # asm 1: movq r14_stack=stack64#4 # asm 2: movq r14_stack=632(%rsp) movq % r14, 632( % rsp) # qhasm: r15_stack = caller_r15 # asm 1: movq r15_stack=stack64#5 # asm 2: movq r15_stack=640(%rsp) movq % r15, 640( % rsp) # qhasm: rbx_stack = caller_rbx # asm 1: movq rbx_stack=stack64#6 # asm 2: movq rbx_stack=648(%rsp) movq % rbx, 648( % rsp) # qhasm: ptr = &buf # asm 1: leaq ptr=int64#4 # asm 2: leaq ptr=%rcx leaq 0( % rsp), % rcx # qhasm: s0 = mem256[ input_1 + 0 ] # asm 1: vmovupd 0(s0=reg256#1 # asm 2: vmovupd 0(s0=%ymm0 vmovupd 0( % rsi), % ymm0 # qhasm: s1 = mem256[ input_1 + 32 ] # asm 1: vmovupd 32(s1=reg256#2 # asm 2: vmovupd 32(s1=%ymm1 vmovupd 32( % rsi), % ymm1 # qhasm: s2 = mem256[ input_1 + 64 ] # asm 1: vmovupd 64(s2=reg256#3 # asm 2: vmovupd 64(s2=%ymm2 vmovupd 64( % rsi), % ymm2 # qhasm: t0 = mem256[ input_2 + 0 ] # asm 1: vmovupd 0(t0=reg256#4 # asm 2: vmovupd 0(t0=%ymm3 vmovupd 0( % rdx), % ymm3 # qhasm: t1 = mem256[ input_2 + 32 ] # asm 1: vmovupd 32(t1=reg256#5 # asm 2: vmovupd 32(t1=%ymm4 vmovupd 32( % rdx), % ymm4 # qhasm: t2 = mem256[ input_2 + 64 ] # asm 1: vmovupd 64(t2=reg256#6 # asm 2: vmovupd 64(t2=%ymm5 vmovupd 64( % rdx), % ymm5 # qhasm: a5[0,1,2,3] = s2[2,2,3,3] # asm 1: vpermq $0xfa,a5=reg256#7 # asm 2: vpermq $0xfa,a5=%ymm6 vpermq $0xfa, % ymm2, % ymm6 # qhasm: b5[0,1,2,3] = t2[2,3,2,3] # asm 1: vpermq $0xee,b5=reg256#8 # asm 2: vpermq $0xee,b5=%ymm7 vpermq $0xee, % ymm5, % ymm7 # qhasm: r10 = a5 & b5 # asm 1: vpand r10=reg256#9 # asm 2: vpand r10=%ymm8 vpand % ymm6, % ymm7, % ymm8 # qhasm: mem256[ ptr + 320 ] = r10 # asm 1: vmovupd b4=reg256#6 # asm 2: vpermq $0x44,b4=%ymm5 vpermq $0x44, % ymm5, % ymm5 # qhasm: r9 = a5 & b4 # asm 1: vpand r9=reg256#9 # asm 2: vpand r9=%ymm8 vpand % ymm6, % ymm5, % ymm8 # qhasm: b3[0,1,2,3] = t1[2,3,2,3] # asm 1: vpermq $0xee,b3=reg256#10 # asm 2: vpermq $0xee,b3=%ymm9 vpermq $0xee, % ymm4, % ymm9 # qhasm: r8 = a5 & b3 # asm 1: vpand r8=reg256#11 # asm 2: vpand r8=%ymm10 vpand % ymm6, % ymm9, % ymm10 # qhasm: b2[0,1,2,3] = t1[0,1,0,1] # asm 1: vpermq $0x44,b2=reg256#5 # asm 2: vpermq $0x44,b2=%ymm4 vpermq $0x44, % ymm4, % ymm4 # qhasm: r7 = a5 & b2 # asm 1: vpand r7=reg256#12 # asm 2: vpand r7=%ymm11 vpand % ymm6, % ymm4, % ymm11 # qhasm: b1[0,1,2,3] = t0[2,3,2,3] # asm 1: vpermq $0xee,b1=reg256#13 # asm 2: vpermq $0xee,b1=%ymm12 vpermq $0xee, % ymm3, % ymm12 # qhasm: r6 = a5 & b1 # asm 1: vpand r6=reg256#14 # asm 2: vpand r6=%ymm13 vpand % ymm6, % ymm12, % ymm13 # qhasm: b0[0,1,2,3] = t0[0,1,0,1] # asm 1: vpermq $0x44,b0=reg256#4 # asm 2: vpermq $0x44,b0=%ymm3 vpermq $0x44, % ymm3, % ymm3 # qhasm: r5 = a5 & b0 # asm 1: vpand r5=reg256#7 # asm 2: vpand r5=%ymm6 vpand % ymm6, % ymm3, % ymm6 # qhasm: a4[0,1,2,3] = s2[0,0,1,1] # asm 1: vpermq $0x50,a4=reg256#3 # asm 2: vpermq $0x50,a4=%ymm2 vpermq $0x50, % ymm2, % ymm2 # qhasm: r = a4 & b5 # asm 1: vpand r=reg256#15 # asm 2: vpand r=%ymm14 vpand % ymm2, % ymm7, % ymm14 # qhasm: r9 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand % ymm2, % ymm5, % ymm8 # qhasm: r8 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand % ymm2, % ymm9, % ymm8 # qhasm: r7 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand % ymm2, % ymm4, % ymm8 # qhasm: r6 ^= r # asm 1: vpxor r=reg256#9 # asm 2: vpand r=%ymm8 vpand % ymm2, % ymm12, % ymm8 # qhasm: r5 ^= r # asm 1: vpxor r4=reg256#3 # asm 2: vpand r4=%ymm2 vpand % ymm2, % ymm3, % ymm2 # qhasm: a3[0,1,2,3] = s1[2,2,3,3] # asm 1: vpermq $0xfa,a3=reg256#9 # asm 2: vpermq $0xfa,a3=%ymm8 vpermq $0xfa, % ymm1, % ymm8 # qhasm: r = a3 & b5 # asm 1: vpand r=reg256#15 # asm 2: vpand r=%ymm14 vpand % ymm8, % ymm7, % ymm14 # qhasm: r8 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm8, % ymm5, % ymm10 # qhasm: r7 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm8, % ymm9, % ymm10 # qhasm: r6 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm8, % ymm4, % ymm10 # qhasm: r5 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm8, % ymm12, % ymm10 # qhasm: r4 ^= r # asm 1: vpxor r3=reg256#9 # asm 2: vpand r3=%ymm8 vpand % ymm8, % ymm3, % ymm8 # qhasm: a2[0,1,2,3] = s1[0,0,1,1] # asm 1: vpermq $0x50,a2=reg256#2 # asm 2: vpermq $0x50,a2=%ymm1 vpermq $0x50, % ymm1, % ymm1 # qhasm: r = a2 & b5 # asm 1: vpand r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm1, % ymm7, % ymm10 # qhasm: r7 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm1, % ymm5, % ymm10 # qhasm: r6 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm1, % ymm9, % ymm10 # qhasm: r5 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm1, % ymm4, % ymm10 # qhasm: r4 ^= r # asm 1: vpxor r=reg256#11 # asm 2: vpand r=%ymm10 vpand % ymm1, % ymm12, % ymm10 # qhasm: r3 ^= r # asm 1: vpxor r2=reg256#2 # asm 2: vpand r2=%ymm1 vpand % ymm1, % ymm3, % ymm1 # qhasm: a1[0,1,2,3] = s0[2,2,3,3] # asm 1: vpermq $0xfa,a1=reg256#11 # asm 2: vpermq $0xfa,a1=%ymm10 vpermq $0xfa, % ymm0, % ymm10 # qhasm: r = a1 & b5 # asm 1: vpand r=reg256#12 # asm 2: vpand r=%ymm11 vpand % ymm10, % ymm7, % ymm11 # qhasm: r6 ^= r # asm 1: vpxor r=reg256#12 # asm 2: vpand r=%ymm11 vpand % ymm10, % ymm5, % ymm11 # qhasm: r5 ^= r # asm 1: vpxor r=reg256#12 # asm 2: vpand r=%ymm11 vpand % ymm10, % ymm9, % ymm11 # qhasm: r4 ^= r # asm 1: vpxor r=reg256#12 # asm 2: vpand r=%ymm11 vpand % ymm10, % ymm4, % ymm11 # qhasm: r3 ^= r # asm 1: vpxor r=reg256#12 # asm 2: vpand r=%ymm11 vpand % ymm10, % ymm12, % ymm11 # qhasm: r2 ^= r # asm 1: vpxor r1=reg256#11 # asm 2: vpand r1=%ymm10 vpand % ymm10, % ymm3, % ymm10 # qhasm: a0[0,1,2,3] = s0[0,0,1,1] # asm 1: vpermq $0x50,a0=reg256#1 # asm 2: vpermq $0x50,a0=%ymm0 vpermq $0x50, % ymm0, % ymm0 # qhasm: r = a0 & b5 # asm 1: vpand r=reg256#8 # asm 2: vpand r=%ymm7 vpand % ymm0, % ymm7, % ymm7 # qhasm: r5 ^= r # asm 1: vpxor r=reg256#6 # asm 2: vpand r=%ymm5 vpand % ymm0, % ymm5, % ymm5 # qhasm: r4 ^= r # asm 1: vpxor r=reg256#6 # asm 2: vpand r=%ymm5 vpand % ymm0, % ymm9, % ymm5 # qhasm: r3 ^= r # asm 1: vpxor r=reg256#5 # asm 2: vpand r=%ymm4 vpand % ymm0, % ymm4, % ymm4 # qhasm: r2 ^= r # asm 1: vpxor r=reg256#5 # asm 2: vpand r=%ymm4 vpand % ymm0, % ymm12, % ymm4 # qhasm: r1 ^= r # asm 1: vpxor r0=reg256#1 # asm 2: vpand r0=%ymm0 vpand % ymm0, % ymm3, % ymm0 # qhasm: mem256[ ptr + 128 ] = r4 # asm 1: vmovupd h22=int64#2 # asm 2: movq 344(h22=%rsi movq 344( % rcx), % rsi # qhasm: h13 = h22 # asm 1: mov h13=int64#3 # asm 2: mov h13=%rdx mov % rsi, % rdx # qhasm: h10 = h22 # asm 1: mov h10=int64#2 # asm 2: mov h10=%rsi mov % rsi, % rsi # qhasm: h21 = mem64[ ptr + 336 ] # asm 1: movq 336(h21=int64#5 # asm 2: movq 336(h21=%r8 movq 336( % rcx), % r8 # qhasm: h21 ^= *(uint64 *) ( ptr + 328 ) # asm 1: xorq 328(h12=int64#6 # asm 2: mov h12=%r9 mov % r8, % r9 # qhasm: h9 = h21 # asm 1: mov h9=int64#5 # asm 2: mov h9=%r8 mov % r8, % r8 # qhasm: h20 = mem64[ ptr + 312 ] # asm 1: movq 312(h20=int64#7 # asm 2: movq 312(h20=%rax movq 312( % rcx), % rax # qhasm: h20 ^= *(uint64 *) ( ptr + 320 ) # asm 1: xorq 320(h11=int64#8 # asm 2: mov h11=%r10 mov % rax, % r10 # qhasm: h8 = h20 # asm 1: mov h8=int64#7 # asm 2: mov h8=%rax mov % rax, % rax # qhasm: h19 = mem64[ ptr + 304 ] # asm 1: movq 304(h19=int64#9 # asm 2: movq 304(h19=%r11 movq 304( % rcx), % r11 # qhasm: h19 ^= *(uint64 *) ( ptr + 296 ) # asm 1: xorq 296(h7=int64#9 # asm 2: mov h7=%r11 mov % r11, % r11 # qhasm: h18 = mem64[ ptr + 280 ] # asm 1: movq 280(h18=int64#10 # asm 2: movq 280(h18=%r12 movq 280( % rcx), % r12 # qhasm: h18 ^= *(uint64 *) ( ptr + 288 ) # asm 1: xorq 288(h6=int64#10 # asm 2: mov h6=%r12 mov % r12, % r12 # qhasm: h17 = mem64[ ptr + 272 ] # asm 1: movq 272(h17=int64#11 # asm 2: movq 272(h17=%r13 movq 272( % rcx), % r13 # qhasm: h17 ^= *(uint64 *) ( ptr + 264 ) # asm 1: xorq 264(h5=int64#11 # asm 2: mov h5=%r13 mov % r13, % r13 # qhasm: h16 = mem64[ ptr + 248 ] # asm 1: movq 248(h16=int64#12 # asm 2: movq 248(h16=%r14 movq 248( % rcx), % r14 # qhasm: h16 ^= *(uint64 *) ( ptr + 256 ) # asm 1: xorq 256(h4=int64#12 # asm 2: mov h4=%r14 mov % r14, % r14 # qhasm: h15 = mem64[ ptr + 240 ] # asm 1: movq 240(h15=int64#13 # asm 2: movq 240(h15=%r15 movq 240( % rcx), % r15 # qhasm: h15 ^= *(uint64 *) ( ptr + 232 ) # asm 1: xorq 232(h3=int64#13 # asm 2: mov h3=%r15 mov % r15, % r15 # qhasm: h14 = mem64[ ptr + 216 ] # asm 1: movq 216(h14=int64#14 # asm 2: movq 216(h14=%rbx movq 216( % rcx), % rbx # qhasm: h14 ^= *(uint64 *) ( ptr + 224 ) # asm 1: xorq 224(h2=int64#14 # asm 2: mov h2=%rbx mov % rbx, % rbx # qhasm: h13 ^= *(uint64 *) ( ptr + 208 ) # asm 1: xorq 208(h1=int64#3 # asm 2: mov h1=%rdx mov % rdx, % rdx # qhasm: h12 ^= *(uint64 *) ( ptr + 184 ) # asm 1: xorq 184(h0=int64#6 # asm 2: mov h0=%r9 mov % r9, % r9 # qhasm: h11 ^= *(uint64 *) ( ptr + 176 ) # asm 1: xorq 176(caller_r11=int64#9 # asm 2: movq caller_r11=%r11 movq 608( % rsp), % r11 # qhasm: caller_r12 = r12_stack # asm 1: movq caller_r12=int64#10 # asm 2: movq caller_r12=%r12 movq 616( % rsp), % r12 # qhasm: caller_r13 = r13_stack # asm 1: movq caller_r13=int64#11 # asm 2: movq caller_r13=%r13 movq 624( % rsp), % r13 # qhasm: caller_r14 = r14_stack # asm 1: movq caller_r14=int64#12 # asm 2: movq caller_r14=%r14 movq 632( % rsp), % r14 # qhasm: caller_r15 = r15_stack # asm 1: movq caller_r15=int64#13 # asm 2: movq caller_r15=%r15 movq 640( % rsp), % r15 # qhasm: caller_rbx = rbx_stack # asm 1: movq caller_rbx=int64#14 # asm 2: movq caller_rbx=%rbx movq 648( % rsp), % rbx # qhasm: return add % r11, % rsp ret