// This file is generated from a similarly-named Perl script in the BoringSSL // source tree. Do not edit by hand. #include #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) #ifndef MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX .text .globl gcm_init_avx512 .hidden gcm_init_avx512 .hidden gcm_init_avx512 .type gcm_init_avx512,@function .align 32 gcm_init_avx512: .cfi_startproc .byte 243,15,30,250 vmovdqu64 (%rsi),%xmm16 vpalignr $8,%xmm16,%xmm16,%xmm16 vmovdqa64 %xmm16,%xmm2 vpsllq $1,%xmm16,%xmm16 vpsrlq $63,%xmm2,%xmm2 vmovdqa %xmm2,%xmm1 vpslldq $8,%xmm2,%xmm2 vpsrldq $8,%xmm1,%xmm1 vporq %xmm2,%xmm16,%xmm16 vpshufd $36,%xmm1,%xmm2 vpcmpeqd TWOONE(%rip),%xmm2,%xmm2 vpand POLY(%rip),%xmm2,%xmm2 vpxorq %xmm2,%xmm16,%xmm16 vmovdqu64 %xmm16,240(%rdi) vshufi32x4 $0x00,%ymm16,%ymm16,%ymm4 vmovdqa %ymm4,%ymm3 .byte 98,243,101,40,68,196,17 .byte 98,243,101,40,68,204,0 .byte 98,243,101,40,68,212,1 .byte 98,243,101,40,68,220,16 vpxorq %ymm2,%ymm3,%ymm3 vpsrldq $8,%ymm3,%ymm2 vpslldq $8,%ymm3,%ymm3 vpxorq %ymm2,%ymm0,%ymm0 vpxorq %ymm1,%ymm3,%ymm3 vmovdqu64 POLY2(%rip),%ymm2 .byte 98,243,109,40,68,203,1 vpslldq $8,%ymm1,%ymm1 vpxorq %ymm1,%ymm3,%ymm3 .byte 98,243,109,40,68,203,0 vpsrldq $4,%ymm1,%ymm1 .byte 98,243,109,40,68,219,16 vpslldq $4,%ymm3,%ymm3 vpternlogq $0x96,%ymm1,%ymm0,%ymm3 vmovdqu64 %xmm3,224(%rdi) vinserti64x2 $1,%xmm16,%ymm3,%ymm4 vmovdqa64 %ymm4,%ymm5 .byte 98,243,93,40,68,195,17 .byte 98,243,93,40,68,203,0 .byte 98,243,93,40,68,211,1 .byte 98,243,93,40,68,227,16 vpxorq %ymm2,%ymm4,%ymm4 vpsrldq $8,%ymm4,%ymm2 vpslldq $8,%ymm4,%ymm4 vpxorq %ymm2,%ymm0,%ymm0 vpxorq %ymm1,%ymm4,%ymm4 vmovdqu64 POLY2(%rip),%ymm2 .byte 98,243,109,40,68,204,1 vpslldq $8,%ymm1,%ymm1 vpxorq %ymm1,%ymm4,%ymm4 .byte 98,243,109,40,68,204,0 vpsrldq $4,%ymm1,%ymm1 .byte 98,243,109,40,68,228,16 vpslldq $4,%ymm4,%ymm4 vpternlogq $0x96,%ymm1,%ymm0,%ymm4 vmovdqu64 %ymm4,192(%rdi) vinserti64x4 $1,%ymm5,%zmm4,%zmm4 vshufi64x2 $0x00,%zmm4,%zmm4,%zmm3 vmovdqa64 %zmm4,%zmm5 .byte 98,243,93,72,68,195,17 .byte 98,243,93,72,68,203,0 .byte 98,243,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm2,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm2 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm2 .byte 98,243,109,72,68,204,1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm4,%zmm4 .byte 98,243,109,72,68,204,0 vpsrldq $4,%zmm1,%zmm1 .byte 98,243,109,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm1,%zmm0,%zmm4 vmovdqu64 %zmm4,128(%rdi) vshufi64x2 $0x00,%zmm4,%zmm4,%zmm3 .byte 98,243,85,72,68,195,17 .byte 98,243,85,72,68,203,0 .byte 98,243,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm2,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm2 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm2 .byte 98,243,109,72,68,205,1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm5,%zmm5 .byte 98,243,109,72,68,205,0 vpsrldq $4,%zmm1,%zmm1 .byte 98,243,109,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm1,%zmm0,%zmm5 vmovdqu64 %zmm5,64(%rdi) .byte 98,243,93,72,68,195,17 .byte 98,243,93,72,68,203,0 .byte 98,243,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm2,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm2 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm2 .byte 98,243,109,72,68,204,1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm4,%zmm4 .byte 98,243,109,72,68,204,0 vpsrldq $4,%zmm1,%zmm1 .byte 98,243,109,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm1,%zmm0,%zmm4 vmovdqu64 %zmm4,0(%rdi) vzeroupper .Lexit_init: .byte 0xf3,0xc3 .cfi_endproc .size gcm_init_avx512, .-gcm_init_avx512 .globl gcm_gmult_avx512 .hidden gcm_gmult_avx512 .hidden gcm_gmult_avx512 .type gcm_gmult_avx512,@function .align 32 gcm_gmult_avx512: .cfi_startproc .byte 243,15,30,250 vmovdqu64 (%rdi),%xmm1 vpshufb SHUF_MASK(%rip),%xmm1,%xmm1 vmovdqu64 240(%rsi),%xmm2 .byte 98,243,117,8,68,218,17 .byte 98,243,117,8,68,226,0 .byte 98,243,117,8,68,234,1 .byte 98,243,117,8,68,202,16 vpxorq %xmm5,%xmm1,%xmm1 vpsrldq $8,%xmm1,%xmm5 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm5,%xmm3,%xmm3 vpxorq %xmm4,%xmm1,%xmm1 vmovdqu64 POLY2(%rip),%xmm5 .byte 98,243,85,8,68,225,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm1,%xmm1 .byte 98,243,85,8,68,225,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,243,85,8,68,201,16 vpslldq $4,%xmm1,%xmm1 vpternlogq $0x96,%xmm4,%xmm3,%xmm1 vpshufb SHUF_MASK(%rip),%xmm1,%xmm1 vmovdqu64 %xmm1,(%rdi) vzeroupper .Lexit_gmult: .byte 0xf3,0xc3 .cfi_endproc .size gcm_gmult_avx512, .-gcm_gmult_avx512 .globl gcm_ghash_avx512 .hidden gcm_ghash_avx512 .hidden gcm_ghash_avx512 .type gcm_ghash_avx512,@function .align 32 gcm_ghash_avx512: .cfi_startproc .Lghash_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lghash_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lghash_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lghash_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lghash_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lghash_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lghash_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lghash_seh_setfp: .Lghash_seh_prolog_end: subq $820,%rsp andq $(-64),%rsp vmovdqu64 (%rdi),%xmm14 vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 movq %rdx,%r10 movq %rcx,%r11 orq %r11,%r11 jz .L_CALC_AAD_done_hEgxyDlCngwrfFe xorq %rbx,%rbx vmovdqa64 SHUF_MASK(%rip),%zmm16 .L_get_AAD_loop48x16_hEgxyDlCngwrfFe: cmpq $768,%r11 jl .L_exit_AAD_loop48x16_hEgxyDlCngwrfFe vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_amivrujEyduiFoi vmovdqu64 192(%rsi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 128(%rsi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 64(%rsi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 0(%rsi),%zmm12 vmovdqu64 %zmm12,512(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,192(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,128(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,64(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,0(%rsp) .L_skip_hkeys_precomputation_amivrujEyduiFoi: movq $1,%rbx vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 0(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 64(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 128(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 192(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 256(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 320(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 448(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 512(%r10),%zmm11 vmovdqu64 576(%r10),%zmm3 vmovdqu64 640(%r10),%zmm4 vmovdqu64 704(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 576(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 704(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 .byte 98,211,45,8,68,200,1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 .byte 98,115,45,8,68,201,0 vpsrldq $4,%xmm9,%xmm9 .byte 98,115,45,8,68,241,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $768,%r11 je .L_CALC_AAD_done_hEgxyDlCngwrfFe addq $768,%r10 jmp .L_get_AAD_loop48x16_hEgxyDlCngwrfFe .L_exit_AAD_loop48x16_hEgxyDlCngwrfFe: cmpq $512,%r11 jl .L_less_than_32x16_hEgxyDlCngwrfFe vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_wcpqaDvsGlbjGoe vmovdqu64 192(%rsi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 128(%rsi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 64(%rsi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 0(%rsi),%zmm12 vmovdqu64 %zmm12,512(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) .L_skip_hkeys_precomputation_wcpqaDvsGlbjGoe: movq $1,%rbx vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 256(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 320(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 448(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 576(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 704(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 .byte 98,211,45,8,68,200,1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 .byte 98,115,45,8,68,201,0 vpsrldq $4,%xmm9,%xmm9 .byte 98,115,45,8,68,241,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $512,%r11 je .L_CALC_AAD_done_hEgxyDlCngwrfFe addq $512,%r10 jmp .L_less_than_16x16_hEgxyDlCngwrfFe .L_less_than_32x16_hEgxyDlCngwrfFe: cmpq $256,%r11 jl .L_less_than_16x16_hEgxyDlCngwrfFe vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 0(%rsi),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 64(%rsi),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 128(%rsi),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 192(%rsi),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 .byte 98,211,45,8,68,200,1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 .byte 98,115,45,8,68,201,0 vpsrldq $4,%xmm9,%xmm9 .byte 98,115,45,8,68,241,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $256,%r11 je .L_CALC_AAD_done_hEgxyDlCngwrfFe addq $256,%r10 .L_less_than_16x16_hEgxyDlCngwrfFe: leaq byte64_len_to_mask_table(%rip),%r12 leaq (%r12,%r11,8),%r12 addl $15,%r11d shrl $4,%r11d cmpl $2,%r11d jb .L_AAD_blocks_1_hEgxyDlCngwrfFe je .L_AAD_blocks_2_hEgxyDlCngwrfFe cmpl $4,%r11d jb .L_AAD_blocks_3_hEgxyDlCngwrfFe je .L_AAD_blocks_4_hEgxyDlCngwrfFe cmpl $6,%r11d jb .L_AAD_blocks_5_hEgxyDlCngwrfFe je .L_AAD_blocks_6_hEgxyDlCngwrfFe cmpl $8,%r11d jb .L_AAD_blocks_7_hEgxyDlCngwrfFe je .L_AAD_blocks_8_hEgxyDlCngwrfFe cmpl $10,%r11d jb .L_AAD_blocks_9_hEgxyDlCngwrfFe je .L_AAD_blocks_10_hEgxyDlCngwrfFe cmpl $12,%r11d jb .L_AAD_blocks_11_hEgxyDlCngwrfFe je .L_AAD_blocks_12_hEgxyDlCngwrfFe cmpl $14,%r11d jb .L_AAD_blocks_13_hEgxyDlCngwrfFe je .L_AAD_blocks_14_hEgxyDlCngwrfFe cmpl $15,%r11d je .L_AAD_blocks_15_hEgxyDlCngwrfFe .L_AAD_blocks_16_hEgxyDlCngwrfFe: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 0(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 64(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 128(%rsi),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm9,%zmm11,%zmm1 vpternlogq $0x96,%zmm10,%zmm3,%zmm6 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm12,%zmm11,%zmm7 vpternlogq $0x96,%zmm13,%zmm3,%zmm8 vmovdqu64 192(%rsi),%zmm15 .byte 98,83,85,72,68,207,17 .byte 98,83,85,72,68,215,0 .byte 98,83,85,72,68,231,1 .byte 98,83,85,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_15_hEgxyDlCngwrfFe: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 16(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 80(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 144(%rsi),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 208(%rsi),%ymm15 vinserti64x2 $2,240(%rsi),%zmm15,%zmm15 .byte 98,211,85,72,68,255,1 .byte 98,83,85,72,68,199,16 .byte 98,211,85,72,68,207,17 .byte 98,211,85,72,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_14_hEgxyDlCngwrfFe: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%ymm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %ymm16,%ymm5,%ymm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 32(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 96(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 160(%rsi),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 224(%rsi),%ymm15 .byte 98,211,85,40,68,255,1 .byte 98,83,85,40,68,199,16 .byte 98,211,85,40,68,207,17 .byte 98,211,85,40,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_13_hEgxyDlCngwrfFe: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%xmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %xmm16,%xmm5,%xmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 48(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 112(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 176(%rsi),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 240(%rsi),%xmm15 .byte 98,211,85,8,68,255,1 .byte 98,83,85,8,68,199,16 .byte 98,211,85,8,68,207,17 .byte 98,211,85,8,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_12_hEgxyDlCngwrfFe: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 64(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 128(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 192(%rsi),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_11_hEgxyDlCngwrfFe: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 80(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 144(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 208(%rsi),%ymm15 vinserti64x2 $2,240(%rsi),%zmm15,%zmm15 .byte 98,211,93,72,68,255,1 .byte 98,83,93,72,68,199,16 .byte 98,211,93,72,68,207,17 .byte 98,211,93,72,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_10_hEgxyDlCngwrfFe: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%ymm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %ymm16,%ymm4,%ymm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 96(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 160(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 224(%rsi),%ymm15 .byte 98,211,93,40,68,255,1 .byte 98,83,93,40,68,199,16 .byte 98,211,93,40,68,207,17 .byte 98,211,93,40,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_9_hEgxyDlCngwrfFe: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%xmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %xmm16,%xmm4,%xmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 112(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 176(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 240(%rsi),%xmm15 .byte 98,211,93,8,68,255,1 .byte 98,83,93,8,68,199,16 .byte 98,211,93,8,68,207,17 .byte 98,211,93,8,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_8_hEgxyDlCngwrfFe: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 128(%rsi),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 192(%rsi),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_7_hEgxyDlCngwrfFe: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 144(%rsi),%zmm15 .byte 98,83,37,72,68,207,17 .byte 98,83,37,72,68,215,0 .byte 98,83,37,72,68,231,1 .byte 98,83,37,72,68,239,16 vmovdqu64 208(%rsi),%ymm15 vinserti64x2 $2,240(%rsi),%zmm15,%zmm15 .byte 98,211,101,72,68,255,1 .byte 98,83,101,72,68,199,16 .byte 98,211,101,72,68,207,17 .byte 98,211,101,72,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_6_hEgxyDlCngwrfFe: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%ymm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %ymm16,%ymm3,%ymm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 160(%rsi),%zmm15 .byte 98,83,37,72,68,207,17 .byte 98,83,37,72,68,215,0 .byte 98,83,37,72,68,231,1 .byte 98,83,37,72,68,239,16 vmovdqu64 224(%rsi),%ymm15 .byte 98,211,101,40,68,255,1 .byte 98,83,101,40,68,199,16 .byte 98,211,101,40,68,207,17 .byte 98,211,101,40,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_5_hEgxyDlCngwrfFe: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%xmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %xmm16,%xmm3,%xmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 176(%rsi),%zmm15 .byte 98,83,37,72,68,207,17 .byte 98,83,37,72,68,215,0 .byte 98,83,37,72,68,231,1 .byte 98,83,37,72,68,239,16 vmovdqu64 240(%rsi),%xmm15 .byte 98,211,101,8,68,255,1 .byte 98,83,101,8,68,199,16 .byte 98,211,101,8,68,207,17 .byte 98,211,101,8,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_4_hEgxyDlCngwrfFe: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 192(%rsi),%zmm15 .byte 98,83,37,72,68,207,17 .byte 98,83,37,72,68,215,0 .byte 98,83,37,72,68,231,1 .byte 98,83,37,72,68,239,16 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_3_hEgxyDlCngwrfFe: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 208(%rsi),%ymm15 vinserti64x2 $2,240(%rsi),%zmm15,%zmm15 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_2_hEgxyDlCngwrfFe: kmovq (%r12),%k1 vmovdqu8 0(%r10),%ymm11{%k1}{z} vpshufb %ymm16,%ymm11,%ymm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 224(%rsi),%ymm15 .byte 98,211,37,40,68,255,1 .byte 98,83,37,40,68,199,16 .byte 98,211,37,40,68,207,17 .byte 98,211,37,40,68,247,0 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_hEgxyDlCngwrfFe .L_AAD_blocks_1_hEgxyDlCngwrfFe: kmovq (%r12),%k1 vmovdqu8 0(%r10),%xmm11{%k1}{z} vpshufb %xmm16,%xmm11,%xmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 240(%rsi),%xmm15 .byte 98,211,37,8,68,255,1 .byte 98,83,37,8,68,199,16 .byte 98,211,37,8,68,207,17 .byte 98,211,37,8,68,247,0 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,115,5,8,68,247,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 .L_CALC_AAD_done_hEgxyDlCngwrfFe: vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 vmovdqu64 %xmm14,(%rdi) cmpq $256,%rcx jbe .Lskip_hkeys_cleanup_EmbgEptodyewbFa vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_EmbgEptodyewbFa: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .Lexit_ghash: .byte 0xf3,0xc3 .Lghash_seh_end: .cfi_endproc .size gcm_ghash_avx512, .-gcm_ghash_avx512 .globl gcm_setiv_avx512 .hidden gcm_setiv_avx512 .hidden gcm_setiv_avx512 .type gcm_setiv_avx512,@function .align 32 gcm_setiv_avx512: .cfi_startproc .Lsetiv_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lsetiv_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lsetiv_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lsetiv_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lsetiv_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lsetiv_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lsetiv_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lsetiv_seh_setfp: .Lsetiv_seh_prolog_end: subq $820,%rsp andq $(-64),%rsp cmpq $12,%rcx je iv_len_12_init_IV vpxor %xmm2,%xmm2,%xmm2 leaq 80(%rsi),%r13 movq %rdx,%r10 movq %rcx,%r11 orq %r11,%r11 jz .L_CALC_AAD_done_bnzFsuvmDknpsbp xorq %rbx,%rbx vmovdqa64 SHUF_MASK(%rip),%zmm16 .L_get_AAD_loop48x16_bnzFsuvmDknpsbp: cmpq $768,%r11 jl .L_exit_AAD_loop48x16_bnzFsuvmDknpsbp vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_dBmbyqhifbmbobw vmovdqu64 192(%r13),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 128(%r13),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 64(%r13),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 0(%r13),%zmm12 vmovdqu64 %zmm12,512(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,192(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,128(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,64(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,0(%rsp) .L_skip_hkeys_precomputation_dBmbyqhifbmbobw: movq $1,%rbx vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 0(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 64(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 128(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 192(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 256(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 320(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 448(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 512(%r10),%zmm11 vmovdqu64 576(%r10),%zmm3 vmovdqu64 640(%r10),%zmm4 vmovdqu64 704(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 576(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 704(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 .byte 98,211,45,8,68,200,1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 .byte 98,115,45,8,68,201,0 vpsrldq $4,%xmm9,%xmm9 .byte 98,243,45,8,68,209,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $768,%r11 je .L_CALC_AAD_done_bnzFsuvmDknpsbp addq $768,%r10 jmp .L_get_AAD_loop48x16_bnzFsuvmDknpsbp .L_exit_AAD_loop48x16_bnzFsuvmDknpsbp: cmpq $512,%r11 jl .L_less_than_32x16_bnzFsuvmDknpsbp vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_javBbvtBBkicjdB vmovdqu64 192(%r13),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 128(%r13),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 64(%r13),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 0(%r13),%zmm12 vmovdqu64 %zmm12,512(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) .byte 98,83,45,72,68,233,17 .byte 98,83,45,72,68,249,0 .byte 98,195,45,72,68,201,1 .byte 98,83,45,72,68,209,16 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,250,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 .byte 98,83,117,64,68,250,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,210,16 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) .byte 98,83,29,72,68,233,17 .byte 98,83,29,72,68,249,0 .byte 98,195,29,72,68,201,1 .byte 98,83,29,72,68,225,16 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 .byte 98,83,117,64,68,252,1 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 .byte 98,83,117,64,68,252,0 vpsrldq $4,%zmm15,%zmm15 .byte 98,83,117,64,68,228,16 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) .L_skip_hkeys_precomputation_javBbvtBBkicjdB: movq $1,%rbx vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 256(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 320(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 448(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 576(%rsp),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 704(%rsp),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 .byte 98,211,45,8,68,200,1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 .byte 98,115,45,8,68,201,0 vpsrldq $4,%xmm9,%xmm9 .byte 98,243,45,8,68,209,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $512,%r11 je .L_CALC_AAD_done_bnzFsuvmDknpsbp addq $512,%r10 jmp .L_less_than_16x16_bnzFsuvmDknpsbp .L_less_than_32x16_bnzFsuvmDknpsbp: cmpq $256,%r11 jl .L_less_than_16x16_bnzFsuvmDknpsbp vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 0(%r13),%zmm19 .byte 98,179,37,72,68,203,17 .byte 98,51,37,72,68,203,0 .byte 98,51,37,72,68,211,1 .byte 98,51,37,72,68,227,16 vmovdqu64 64(%r13),%zmm19 .byte 98,51,101,72,68,235,17 .byte 98,51,101,72,68,251,0 .byte 98,163,101,72,68,203,1 .byte 98,163,101,72,68,211,16 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 128(%r13),%zmm19 .byte 98,179,93,72,68,203,17 .byte 98,51,93,72,68,203,0 .byte 98,51,93,72,68,211,1 .byte 98,51,93,72,68,227,16 vmovdqu64 192(%r13),%zmm19 .byte 98,51,85,72,68,235,17 .byte 98,51,85,72,68,251,0 .byte 98,163,85,72,68,203,1 .byte 98,163,85,72,68,211,16 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 .byte 98,211,45,8,68,200,1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 .byte 98,115,45,8,68,201,0 vpsrldq $4,%xmm9,%xmm9 .byte 98,243,45,8,68,209,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $256,%r11 je .L_CALC_AAD_done_bnzFsuvmDknpsbp addq $256,%r10 .L_less_than_16x16_bnzFsuvmDknpsbp: leaq byte64_len_to_mask_table(%rip),%r12 leaq (%r12,%r11,8),%r12 addl $15,%r11d shrl $4,%r11d cmpl $2,%r11d jb .L_AAD_blocks_1_bnzFsuvmDknpsbp je .L_AAD_blocks_2_bnzFsuvmDknpsbp cmpl $4,%r11d jb .L_AAD_blocks_3_bnzFsuvmDknpsbp je .L_AAD_blocks_4_bnzFsuvmDknpsbp cmpl $6,%r11d jb .L_AAD_blocks_5_bnzFsuvmDknpsbp je .L_AAD_blocks_6_bnzFsuvmDknpsbp cmpl $8,%r11d jb .L_AAD_blocks_7_bnzFsuvmDknpsbp je .L_AAD_blocks_8_bnzFsuvmDknpsbp cmpl $10,%r11d jb .L_AAD_blocks_9_bnzFsuvmDknpsbp je .L_AAD_blocks_10_bnzFsuvmDknpsbp cmpl $12,%r11d jb .L_AAD_blocks_11_bnzFsuvmDknpsbp je .L_AAD_blocks_12_bnzFsuvmDknpsbp cmpl $14,%r11d jb .L_AAD_blocks_13_bnzFsuvmDknpsbp je .L_AAD_blocks_14_bnzFsuvmDknpsbp cmpl $15,%r11d je .L_AAD_blocks_15_bnzFsuvmDknpsbp .L_AAD_blocks_16_bnzFsuvmDknpsbp: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 0(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 64(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 128(%r13),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm9,%zmm11,%zmm1 vpternlogq $0x96,%zmm10,%zmm3,%zmm6 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm12,%zmm11,%zmm7 vpternlogq $0x96,%zmm13,%zmm3,%zmm8 vmovdqu64 192(%r13),%zmm15 .byte 98,83,85,72,68,207,17 .byte 98,83,85,72,68,215,0 .byte 98,83,85,72,68,231,1 .byte 98,83,85,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_15_bnzFsuvmDknpsbp: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 16(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 80(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 144(%r13),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 208(%r13),%ymm15 vinserti64x2 $2,240(%r13),%zmm15,%zmm15 .byte 98,211,85,72,68,255,1 .byte 98,83,85,72,68,199,16 .byte 98,211,85,72,68,207,17 .byte 98,211,85,72,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_14_bnzFsuvmDknpsbp: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%ymm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %ymm16,%ymm5,%ymm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 32(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 96(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 160(%r13),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 224(%r13),%ymm15 .byte 98,211,85,40,68,255,1 .byte 98,83,85,40,68,199,16 .byte 98,211,85,40,68,207,17 .byte 98,211,85,40,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_13_bnzFsuvmDknpsbp: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%xmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %xmm16,%xmm5,%xmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 48(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 112(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 176(%r13),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 240(%r13),%xmm15 .byte 98,211,85,8,68,255,1 .byte 98,83,85,8,68,199,16 .byte 98,211,85,8,68,207,17 .byte 98,211,85,8,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_12_bnzFsuvmDknpsbp: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 64(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 128(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vmovdqu64 192(%r13),%zmm15 .byte 98,83,93,72,68,223,17 .byte 98,211,93,72,68,223,0 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 .byte 98,83,93,72,68,223,1 .byte 98,211,93,72,68,223,16 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_11_bnzFsuvmDknpsbp: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 80(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 144(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 208(%r13),%ymm15 vinserti64x2 $2,240(%r13),%zmm15,%zmm15 .byte 98,211,93,72,68,255,1 .byte 98,83,93,72,68,199,16 .byte 98,211,93,72,68,207,17 .byte 98,211,93,72,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_10_bnzFsuvmDknpsbp: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%ymm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %ymm16,%ymm4,%ymm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 96(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 160(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 224(%r13),%ymm15 .byte 98,211,93,40,68,255,1 .byte 98,83,93,40,68,199,16 .byte 98,211,93,40,68,207,17 .byte 98,211,93,40,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_9_bnzFsuvmDknpsbp: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%xmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %xmm16,%xmm4,%xmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 112(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 176(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 240(%r13),%xmm15 .byte 98,211,93,8,68,255,1 .byte 98,83,93,8,68,199,16 .byte 98,211,93,8,68,207,17 .byte 98,211,93,8,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_8_bnzFsuvmDknpsbp: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 128(%r13),%zmm15 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 vmovdqu64 192(%r13),%zmm15 .byte 98,83,101,72,68,207,17 .byte 98,83,101,72,68,215,0 .byte 98,83,101,72,68,231,1 .byte 98,83,101,72,68,239,16 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_7_bnzFsuvmDknpsbp: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 144(%r13),%zmm15 .byte 98,83,37,72,68,207,17 .byte 98,83,37,72,68,215,0 .byte 98,83,37,72,68,231,1 .byte 98,83,37,72,68,239,16 vmovdqu64 208(%r13),%ymm15 vinserti64x2 $2,240(%r13),%zmm15,%zmm15 .byte 98,211,101,72,68,255,1 .byte 98,83,101,72,68,199,16 .byte 98,211,101,72,68,207,17 .byte 98,211,101,72,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_6_bnzFsuvmDknpsbp: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%ymm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %ymm16,%ymm3,%ymm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 160(%r13),%zmm15 .byte 98,83,37,72,68,207,17 .byte 98,83,37,72,68,215,0 .byte 98,83,37,72,68,231,1 .byte 98,83,37,72,68,239,16 vmovdqu64 224(%r13),%ymm15 .byte 98,211,101,40,68,255,1 .byte 98,83,101,40,68,199,16 .byte 98,211,101,40,68,207,17 .byte 98,211,101,40,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_5_bnzFsuvmDknpsbp: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%xmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %xmm16,%xmm3,%xmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 176(%r13),%zmm15 .byte 98,83,37,72,68,207,17 .byte 98,83,37,72,68,215,0 .byte 98,83,37,72,68,231,1 .byte 98,83,37,72,68,239,16 vmovdqu64 240(%r13),%xmm15 .byte 98,211,101,8,68,255,1 .byte 98,83,101,8,68,199,16 .byte 98,211,101,8,68,207,17 .byte 98,211,101,8,68,247,0 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_4_bnzFsuvmDknpsbp: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 192(%r13),%zmm15 .byte 98,83,37,72,68,207,17 .byte 98,83,37,72,68,215,0 .byte 98,83,37,72,68,231,1 .byte 98,83,37,72,68,239,16 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_3_bnzFsuvmDknpsbp: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 208(%r13),%ymm15 vinserti64x2 $2,240(%r13),%zmm15,%zmm15 .byte 98,211,37,72,68,255,1 .byte 98,83,37,72,68,199,16 .byte 98,211,37,72,68,207,17 .byte 98,211,37,72,68,247,0 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_2_bnzFsuvmDknpsbp: kmovq (%r12),%k1 vmovdqu8 0(%r10),%ymm11{%k1}{z} vpshufb %ymm16,%ymm11,%ymm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 224(%r13),%ymm15 .byte 98,211,37,40,68,255,1 .byte 98,83,37,40,68,199,16 .byte 98,211,37,40,68,207,17 .byte 98,211,37,40,68,247,0 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_bnzFsuvmDknpsbp .L_AAD_blocks_1_bnzFsuvmDknpsbp: kmovq (%r12),%k1 vmovdqu8 0(%r10),%xmm11{%k1}{z} vpshufb %xmm16,%xmm11,%xmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 240(%r13),%xmm15 .byte 98,211,37,8,68,255,1 .byte 98,83,37,8,68,199,16 .byte 98,211,37,8,68,207,17 .byte 98,211,37,8,68,247,0 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 .byte 98,243,5,8,68,254,1 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 .byte 98,115,5,8,68,199,0 vpsrldq $4,%xmm8,%xmm8 .byte 98,243,5,8,68,215,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 .L_CALC_AAD_done_bnzFsuvmDknpsbp: movq %rcx,%r10 shlq $3,%r10 vmovq %r10,%xmm3 vpxorq %xmm2,%xmm3,%xmm2 vmovdqu64 240(%r13),%xmm1 .byte 98,115,109,8,68,217,17 .byte 98,243,109,8,68,217,0 .byte 98,243,109,8,68,225,1 .byte 98,243,109,8,68,209,16 vpxorq %xmm4,%xmm2,%xmm2 vpsrldq $8,%xmm2,%xmm4 vpslldq $8,%xmm2,%xmm2 vpxorq %xmm4,%xmm11,%xmm11 vpxorq %xmm3,%xmm2,%xmm2 vmovdqu64 POLY2(%rip),%xmm4 .byte 98,243,93,8,68,218,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm2,%xmm2 .byte 98,243,93,8,68,218,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,243,93,8,68,210,16 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm3,%xmm11,%xmm2 vpshufb SHUF_MASK(%rip),%xmm2,%xmm2 jmp skip_iv_len_12_init_IV iv_len_12_init_IV: vmovdqu8 ONEf(%rip),%xmm2 movq %rdx,%r11 movl $0x0000000000000fff,%r10d kmovq %r10,%k1 vmovdqu8 (%r11),%xmm2{%k1} skip_iv_len_12_init_IV: vmovdqu %xmm2,%xmm1 movl 240(%rdi),%r10d cmpl $9,%r10d je .Laes_128_otBvnbdyuroewzD cmpl $11,%r10d je .Laes_192_otBvnbdyuroewzD cmpl $13,%r10d je .Laes_256_otBvnbdyuroewzD jmp .Lexit_aes_otBvnbdyuroewzD .align 32 .Laes_128_otBvnbdyuroewzD: vpxorq 0(%rdi),%xmm1,%xmm1 .byte 98,242,117,8,220,79,1 .byte 98,242,117,8,220,79,2 .byte 98,242,117,8,220,79,3 .byte 98,242,117,8,220,79,4 .byte 98,242,117,8,220,79,5 .byte 98,242,117,8,220,79,6 .byte 98,242,117,8,220,79,7 .byte 98,242,117,8,220,79,8 .byte 98,242,117,8,220,79,9 .byte 98,242,117,8,221,79,10 jmp .Lexit_aes_otBvnbdyuroewzD .align 32 .Laes_192_otBvnbdyuroewzD: vpxorq 0(%rdi),%xmm1,%xmm1 .byte 98,242,117,8,220,79,1 .byte 98,242,117,8,220,79,2 .byte 98,242,117,8,220,79,3 .byte 98,242,117,8,220,79,4 .byte 98,242,117,8,220,79,5 .byte 98,242,117,8,220,79,6 .byte 98,242,117,8,220,79,7 .byte 98,242,117,8,220,79,8 .byte 98,242,117,8,220,79,9 .byte 98,242,117,8,220,79,10 .byte 98,242,117,8,220,79,11 .byte 98,242,117,8,221,79,12 jmp .Lexit_aes_otBvnbdyuroewzD .align 32 .Laes_256_otBvnbdyuroewzD: vpxorq 0(%rdi),%xmm1,%xmm1 .byte 98,242,117,8,220,79,1 .byte 98,242,117,8,220,79,2 .byte 98,242,117,8,220,79,3 .byte 98,242,117,8,220,79,4 .byte 98,242,117,8,220,79,5 .byte 98,242,117,8,220,79,6 .byte 98,242,117,8,220,79,7 .byte 98,242,117,8,220,79,8 .byte 98,242,117,8,220,79,9 .byte 98,242,117,8,220,79,10 .byte 98,242,117,8,220,79,11 .byte 98,242,117,8,220,79,12 .byte 98,242,117,8,220,79,13 .byte 98,242,117,8,221,79,14 jmp .Lexit_aes_otBvnbdyuroewzD .Lexit_aes_otBvnbdyuroewzD: vmovdqu %xmm1,32(%rsi) vpshufb SHUF_MASK(%rip),%xmm2,%xmm2 vmovdqu %xmm2,0(%rsi) .Lexit_setiv: cmpq $256,%rcx jbe .Lskip_hkeys_cleanup_lDGzdqCkvgheosr vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_lDGzdqCkvgheosr: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .byte 0xf3,0xc3 .Lsetiv_seh_end: .cfi_endproc .size gcm_setiv_avx512, .-gcm_setiv_avx512 .globl aes_gcm_encrypt_avx512 .hidden aes_gcm_encrypt_avx512 .hidden aes_gcm_encrypt_avx512 .type aes_gcm_encrypt_avx512,@function .align 32 aes_gcm_encrypt_avx512: .cfi_startproc .Lencrypt_seh_begin: #ifdef BORINGSSL_DISPATCH_TEST .extern BORINGSSL_function_hit .hidden BORINGSSL_function_hit movb $1,BORINGSSL_function_hit+7(%rip) #endif .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lencrypt_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lencrypt_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lencrypt_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lencrypt_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lencrypt_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lencrypt_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lencrypt_seh_setfp: .Lencrypt_seh_prolog_end: subq $1588,%rsp andq $(-64),%rsp movl 240(%rdi),%eax cmpl $9,%eax je .Laes_gcm_encrypt_128_avx512 cmpl $11,%eax je .Laes_gcm_encrypt_192_avx512 cmpl $13,%eax je .Laes_gcm_encrypt_256_avx512 xorl %eax,%eax jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_128_avx512: orq %r8,%r8 je .L_enc_dec_abort_pzwgkGgbplFqzaB xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 movl (%rdx),%eax orq %rax,%rax je .L_partial_block_done_FkezCgctzlCoEyh movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 leaq 80(%rsi),%r10 vmovdqu64 240(%r10),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %rax,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%rax,1),%r13 subq $16,%r13 jge .L_no_extra_mask_FkezCgctzlCoEyh subq %r13,%r12 .L_no_extra_mask_FkezCgctzlCoEyh: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_FkezCgctzlCoEyh .byte 98,243,13,8,68,252,17 .byte 98,115,13,8,68,212,0 .byte 98,115,13,8,68,220,1 .byte 98,115,13,8,68,244,16 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 .byte 98,83,37,8,68,214,1 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 .byte 98,83,37,8,68,214,0 vpsrldq $4,%xmm10,%xmm10 .byte 98,83,37,8,68,246,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movl $0,(%rdx) movq %rax,%r12 movq $16,%rax subq %r12,%rax jmp .L_enc_dec_done_FkezCgctzlCoEyh .L_partial_incomplete_FkezCgctzlCoEyh: addl %r8d,(%rdx) movq %r8,%rax .L_enc_dec_done_FkezCgctzlCoEyh: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%rax,2),%k1 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_FkezCgctzlCoEyh: vmovdqu64 0(%rsi),%xmm2 subq %rax,%r8 je .L_enc_dec_done_pzwgkGgbplFqzaB cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_pzwgkGgbplFqzaB vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_yByFrylbFDFnFCp vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_yByFrylbFDFnFCp .L_next_16_overflow_yByFrylbFDFnFCp: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_yByFrylbFDFnFCp: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%rax,1),%zmm0 vmovdqu8 64(%rcx,%rax,1),%zmm3 vmovdqu8 128(%rcx,%rax,1),%zmm4 vmovdqu8 192(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%rax,1) vmovdqu8 %zmm10,64(%r10,%rax,1) vmovdqu8 %zmm11,128(%r10,%rax,1) vmovdqu8 %zmm12,192(%r10,%rax,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_achfkmnqFwjgbDD vmovdqu64 192(%r12),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 128(%r12),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 64(%r12),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 0(%r12),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_achfkmnqFwjgbDD: cmpq $512,%r8 jb .L_message_below_32_blocks_pzwgkGgbplFqzaB cmpb $240,%r15b jae .L_next_16_overflow_xvcFynjeulFjDdF vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_xvcFynjeulFjDdF .L_next_16_overflow_xvcFynjeulFjDdF: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_xvcFynjeulFjDdF: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%rax,1),%zmm0 vmovdqu8 320(%rcx,%rax,1),%zmm3 vmovdqu8 384(%rcx,%rax,1),%zmm4 vmovdqu8 448(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%rax,1) vmovdqu8 %zmm10,320(%r10,%rax,1) vmovdqu8 %zmm11,384(%r10,%rax,1) vmovdqu8 %zmm12,448(%r10,%rax,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_mBcrmCyGfEttetw vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_mBcrmCyGfEttetw: movq $1,%r14 addq $512,%rax subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_pzwgkGgbplFqzaB .L_encrypt_big_nblocks_pzwgkGgbplFqzaB: cmpb $240,%r15b jae .L_16_blocks_overflow_avoAfAGuxmumDjA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_avoAfAGuxmumDjA .L_16_blocks_overflow_avoAfAGuxmumDjA: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_avoAfAGuxmumDjA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_AGgjmjawDklDqyq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_AGgjmjawDklDqyq .L_16_blocks_overflow_AGgjmjawDklDqyq: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_AGgjmjawDklDqyq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_gutvpupplrsoEbw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_gutvpupplrsoEbw .L_16_blocks_overflow_gutvpupplrsoEbw: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_gutvpupplrsoEbw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%rax,1),%zmm17 vmovdqu8 576(%rcx,%rax,1),%zmm19 vmovdqu8 640(%rcx,%rax,1),%zmm20 vmovdqu8 704(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%rax,1) vmovdqu8 %zmm3,576(%r10,%rax,1) vmovdqu8 %zmm4,640(%r10,%rax,1) vmovdqu8 %zmm5,704(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%rax subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_pzwgkGgbplFqzaB .L_no_more_big_nblocks_pzwgkGgbplFqzaB: cmpq $512,%r8 jae .L_encrypt_32_blocks_pzwgkGgbplFqzaB cmpq $256,%r8 jae .L_encrypt_16_blocks_pzwgkGgbplFqzaB .L_encrypt_0_blocks_ghash_32_pzwgkGgbplFqzaB: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_BdcphecxdpdFEsb cmpl $8,%r10d je .L_last_num_blocks_is_8_BdcphecxdpdFEsb jb .L_last_num_blocks_is_7_1_BdcphecxdpdFEsb cmpl $12,%r10d je .L_last_num_blocks_is_12_BdcphecxdpdFEsb jb .L_last_num_blocks_is_11_9_BdcphecxdpdFEsb cmpl $15,%r10d je .L_last_num_blocks_is_15_BdcphecxdpdFEsb ja .L_last_num_blocks_is_16_BdcphecxdpdFEsb cmpl $14,%r10d je .L_last_num_blocks_is_14_BdcphecxdpdFEsb jmp .L_last_num_blocks_is_13_BdcphecxdpdFEsb .L_last_num_blocks_is_11_9_BdcphecxdpdFEsb: cmpl $10,%r10d je .L_last_num_blocks_is_10_BdcphecxdpdFEsb ja .L_last_num_blocks_is_11_BdcphecxdpdFEsb jmp .L_last_num_blocks_is_9_BdcphecxdpdFEsb .L_last_num_blocks_is_7_1_BdcphecxdpdFEsb: cmpl $4,%r10d je .L_last_num_blocks_is_4_BdcphecxdpdFEsb jb .L_last_num_blocks_is_3_1_BdcphecxdpdFEsb cmpl $6,%r10d ja .L_last_num_blocks_is_7_BdcphecxdpdFEsb je .L_last_num_blocks_is_6_BdcphecxdpdFEsb jmp .L_last_num_blocks_is_5_BdcphecxdpdFEsb .L_last_num_blocks_is_3_1_BdcphecxdpdFEsb: cmpl $2,%r10d ja .L_last_num_blocks_is_3_BdcphecxdpdFEsb je .L_last_num_blocks_is_2_BdcphecxdpdFEsb .L_last_num_blocks_is_1_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_azzgqhumkfnyDqm vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_azzgqhumkfnyDqm .L_16_blocks_overflow_azzgqhumkfnyDqm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_azzgqhumkfnyDqm: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_dnmqhGDjDpgnine subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dnmqhGDjDpgnine .L_small_initial_partial_block_dnmqhGDjDpgnine: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_dnmqhGDjDpgnine .L_small_initial_compute_done_dnmqhGDjDpgnine: .L_after_reduction_dnmqhGDjDpgnine: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_2_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_yekhBCebufcAiFh vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_yekhBCebufcAiFh .L_16_blocks_overflow_yekhBCebufcAiFh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_yekhBCebufcAiFh: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jwyvkjdvesmxGpv subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jwyvkjdvesmxGpv .L_small_initial_partial_block_jwyvkjdvesmxGpv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jwyvkjdvesmxGpv: orq %r8,%r8 je .L_after_reduction_jwyvkjdvesmxGpv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jwyvkjdvesmxGpv: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_3_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_usjywjwllaabozc vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_usjywjwllaabozc .L_16_blocks_overflow_usjywjwllaabozc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_usjywjwllaabozc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lmkDAitgFzCCoEA subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lmkDAitgFzCCoEA .L_small_initial_partial_block_lmkDAitgFzCCoEA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lmkDAitgFzCCoEA: orq %r8,%r8 je .L_after_reduction_lmkDAitgFzCCoEA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lmkDAitgFzCCoEA: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_4_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_xobkzaAwcplaFgb vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_xobkzaAwcplaFgb .L_16_blocks_overflow_xobkzaAwcplaFgb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_xobkzaAwcplaFgb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_genGClghdbzBqhw subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_genGClghdbzBqhw .L_small_initial_partial_block_genGClghdbzBqhw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_genGClghdbzBqhw: orq %r8,%r8 je .L_after_reduction_genGClghdbzBqhw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_genGClghdbzBqhw: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_5_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_bpsqdGAhjeggABn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_bpsqdGAhjeggABn .L_16_blocks_overflow_bpsqdGAhjeggABn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_bpsqdGAhjeggABn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wdqrtGpojajFBea subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wdqrtGpojajFBea .L_small_initial_partial_block_wdqrtGpojajFBea: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wdqrtGpojajFBea: orq %r8,%r8 je .L_after_reduction_wdqrtGpojajFBea vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wdqrtGpojajFBea: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_6_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_qmgDCpkysmqcgnB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_qmgDCpkysmqcgnB .L_16_blocks_overflow_qmgDCpkysmqcgnB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_qmgDCpkysmqcgnB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GvjnkpjsgDafsun subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GvjnkpjsgDafsun .L_small_initial_partial_block_GvjnkpjsgDafsun: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GvjnkpjsgDafsun: orq %r8,%r8 je .L_after_reduction_GvjnkpjsgDafsun vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GvjnkpjsgDafsun: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_7_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_jaFyvjvpAfzmwyg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_jaFyvjvpAfzmwyg .L_16_blocks_overflow_jaFyvjvpAfzmwyg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_jaFyvjvpAfzmwyg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iuyAGoBcDewEeiy subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iuyAGoBcDewEeiy .L_small_initial_partial_block_iuyAGoBcDewEeiy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iuyAGoBcDewEeiy: orq %r8,%r8 je .L_after_reduction_iuyAGoBcDewEeiy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iuyAGoBcDewEeiy: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_8_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_FbwsrgpDGDmccid vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_FbwsrgpDGDmccid .L_16_blocks_overflow_FbwsrgpDGDmccid: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_FbwsrgpDGDmccid: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lABtdkpvoGeFpzp subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lABtdkpvoGeFpzp .L_small_initial_partial_block_lABtdkpvoGeFpzp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lABtdkpvoGeFpzp: orq %r8,%r8 je .L_after_reduction_lABtdkpvoGeFpzp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lABtdkpvoGeFpzp: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_9_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_dtxuExFwmpsGEiG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_dtxuExFwmpsGEiG .L_16_blocks_overflow_dtxuExFwmpsGEiG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_dtxuExFwmpsGEiG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vkADoeFsfDwilnv subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vkADoeFsfDwilnv .L_small_initial_partial_block_vkADoeFsfDwilnv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vkADoeFsfDwilnv: orq %r8,%r8 je .L_after_reduction_vkADoeFsfDwilnv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vkADoeFsfDwilnv: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_10_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_damgrhyFxffganz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_damgrhyFxffganz .L_16_blocks_overflow_damgrhyFxffganz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_damgrhyFxffganz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iankhgrgFnoiAgG subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iankhgrgFnoiAgG .L_small_initial_partial_block_iankhgrgFnoiAgG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iankhgrgFnoiAgG: orq %r8,%r8 je .L_after_reduction_iankhgrgFnoiAgG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iankhgrgFnoiAgG: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_11_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_gnGEkpgDpmugvpk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_gnGEkpgDpmugvpk .L_16_blocks_overflow_gnGEkpgDpmugvpk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_gnGEkpgDpmugvpk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sECkucceDhaBnCk subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sECkucceDhaBnCk .L_small_initial_partial_block_sECkucceDhaBnCk: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sECkucceDhaBnCk: orq %r8,%r8 je .L_after_reduction_sECkucceDhaBnCk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sECkucceDhaBnCk: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_12_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_qkecuzhoaAuxmmC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_qkecuzhoaAuxmmC .L_16_blocks_overflow_qkecuzhoaAuxmmC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_qkecuzhoaAuxmmC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GEFnxzpzjbtbhxx subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GEFnxzpzjbtbhxx .L_small_initial_partial_block_GEFnxzpzjbtbhxx: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GEFnxzpzjbtbhxx: orq %r8,%r8 je .L_after_reduction_GEFnxzpzjbtbhxx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GEFnxzpzjbtbhxx: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_13_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_BjhkFcriuCnuFez vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_BjhkFcriuCnuFez .L_16_blocks_overflow_BjhkFcriuCnuFez: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_BjhkFcriuCnuFez: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jyxtluvpAmFhjFk subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jyxtluvpAmFhjFk .L_small_initial_partial_block_jyxtluvpAmFhjFk: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jyxtluvpAmFhjFk: orq %r8,%r8 je .L_after_reduction_jyxtluvpAmFhjFk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jyxtluvpAmFhjFk: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_14_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_kGBwgppdvolmGmc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_kGBwgppdvolmGmc .L_16_blocks_overflow_kGBwgppdvolmGmc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_kGBwgppdvolmGmc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AxanimCshomfwbg subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AxanimCshomfwbg .L_small_initial_partial_block_AxanimCshomfwbg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AxanimCshomfwbg: orq %r8,%r8 je .L_after_reduction_AxanimCshomfwbg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AxanimCshomfwbg: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_15_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_EBkkfjcEDyEptfo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_EBkkfjcEDyEptfo .L_16_blocks_overflow_EBkkfjcEDyEptfo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_EBkkfjcEDyEptfo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_henbgxejEhFgymC subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_henbgxejEhFgymC .L_small_initial_partial_block_henbgxejEhFgymC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_henbgxejEhFgymC: orq %r8,%r8 je .L_after_reduction_henbgxejEhFgymC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_henbgxejEhFgymC: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_16_BdcphecxdpdFEsb: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_BlcvjlyDGzsAttk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_BlcvjlyDGzsAttk .L_16_blocks_overflow_BlcvjlyDGzsAttk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_BlcvjlyDGzsAttk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_aGAffhBljtiFsea: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aGAffhBljtiFsea: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aGAffhBljtiFsea: jmp .L_last_blocks_done_BdcphecxdpdFEsb .L_last_num_blocks_is_0_BdcphecxdpdFEsb: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_BdcphecxdpdFEsb: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_pzwgkGgbplFqzaB .L_encrypt_32_blocks_pzwgkGgbplFqzaB: cmpb $240,%r15b jae .L_16_blocks_overflow_zuczDhwqwDAmzjf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_zuczDhwqwDAmzjf .L_16_blocks_overflow_zuczDhwqwDAmzjf: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_zuczDhwqwDAmzjf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_CeGBtrGsogoqpyb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_CeGBtrGsogoqpyb .L_16_blocks_overflow_CeGBtrGsogoqpyb: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_CeGBtrGsogoqpyb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%rax movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_CfrpfvcjvvrcbGa cmpl $8,%r10d je .L_last_num_blocks_is_8_CfrpfvcjvvrcbGa jb .L_last_num_blocks_is_7_1_CfrpfvcjvvrcbGa cmpl $12,%r10d je .L_last_num_blocks_is_12_CfrpfvcjvvrcbGa jb .L_last_num_blocks_is_11_9_CfrpfvcjvvrcbGa cmpl $15,%r10d je .L_last_num_blocks_is_15_CfrpfvcjvvrcbGa ja .L_last_num_blocks_is_16_CfrpfvcjvvrcbGa cmpl $14,%r10d je .L_last_num_blocks_is_14_CfrpfvcjvvrcbGa jmp .L_last_num_blocks_is_13_CfrpfvcjvvrcbGa .L_last_num_blocks_is_11_9_CfrpfvcjvvrcbGa: cmpl $10,%r10d je .L_last_num_blocks_is_10_CfrpfvcjvvrcbGa ja .L_last_num_blocks_is_11_CfrpfvcjvvrcbGa jmp .L_last_num_blocks_is_9_CfrpfvcjvvrcbGa .L_last_num_blocks_is_7_1_CfrpfvcjvvrcbGa: cmpl $4,%r10d je .L_last_num_blocks_is_4_CfrpfvcjvvrcbGa jb .L_last_num_blocks_is_3_1_CfrpfvcjvvrcbGa cmpl $6,%r10d ja .L_last_num_blocks_is_7_CfrpfvcjvvrcbGa je .L_last_num_blocks_is_6_CfrpfvcjvvrcbGa jmp .L_last_num_blocks_is_5_CfrpfvcjvvrcbGa .L_last_num_blocks_is_3_1_CfrpfvcjvvrcbGa: cmpl $2,%r10d ja .L_last_num_blocks_is_3_CfrpfvcjvvrcbGa je .L_last_num_blocks_is_2_CfrpfvcjvvrcbGa .L_last_num_blocks_is_1_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_dbajrbEcjsFpceD vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_dbajrbEcjsFpceD .L_16_blocks_overflow_dbajrbEcjsFpceD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_dbajrbEcjsFpceD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_qFrfFusofbDaigi subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qFrfFusofbDaigi .L_small_initial_partial_block_qFrfFusofbDaigi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_qFrfFusofbDaigi .L_small_initial_compute_done_qFrfFusofbDaigi: .L_after_reduction_qFrfFusofbDaigi: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_2_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_kgpAeeaoAnozgEF vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_kgpAeeaoAnozgEF .L_16_blocks_overflow_kgpAeeaoAnozgEF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_kgpAeeaoAnozgEF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zaugFxnkqnldtoD subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zaugFxnkqnldtoD .L_small_initial_partial_block_zaugFxnkqnldtoD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zaugFxnkqnldtoD: orq %r8,%r8 je .L_after_reduction_zaugFxnkqnldtoD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zaugFxnkqnldtoD: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_3_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_kblsDeoCDCisntD vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_kblsDeoCDCisntD .L_16_blocks_overflow_kblsDeoCDCisntD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_kblsDeoCDCisntD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Ajrbbfyxhsbqszm subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Ajrbbfyxhsbqszm .L_small_initial_partial_block_Ajrbbfyxhsbqszm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Ajrbbfyxhsbqszm: orq %r8,%r8 je .L_after_reduction_Ajrbbfyxhsbqszm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Ajrbbfyxhsbqszm: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_4_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_eGcBplCnDqdtGiy vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_eGcBplCnDqdtGiy .L_16_blocks_overflow_eGcBplCnDqdtGiy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_eGcBplCnDqdtGiy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xeEmmeAmgryyzGr subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xeEmmeAmgryyzGr .L_small_initial_partial_block_xeEmmeAmgryyzGr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xeEmmeAmgryyzGr: orq %r8,%r8 je .L_after_reduction_xeEmmeAmgryyzGr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xeEmmeAmgryyzGr: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_5_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_bgsqDFmekFAimag vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_bgsqDFmekFAimag .L_16_blocks_overflow_bgsqDFmekFAimag: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_bgsqDFmekFAimag: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iwszuhryhslDkgD subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iwszuhryhslDkgD .L_small_initial_partial_block_iwszuhryhslDkgD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iwszuhryhslDkgD: orq %r8,%r8 je .L_after_reduction_iwszuhryhslDkgD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iwszuhryhslDkgD: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_6_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_oaGuttEwoetbnjp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_oaGuttEwoetbnjp .L_16_blocks_overflow_oaGuttEwoetbnjp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_oaGuttEwoetbnjp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pulckbvkcxsatqu subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pulckbvkcxsatqu .L_small_initial_partial_block_pulckbvkcxsatqu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pulckbvkcxsatqu: orq %r8,%r8 je .L_after_reduction_pulckbvkcxsatqu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pulckbvkcxsatqu: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_7_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_FvhiAqmdFpdFmlp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_FvhiAqmdFpdFmlp .L_16_blocks_overflow_FvhiAqmdFpdFmlp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_FvhiAqmdFpdFmlp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sqobqxAEFkeiGsu subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sqobqxAEFkeiGsu .L_small_initial_partial_block_sqobqxAEFkeiGsu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sqobqxAEFkeiGsu: orq %r8,%r8 je .L_after_reduction_sqobqxAEFkeiGsu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sqobqxAEFkeiGsu: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_8_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_hwGtCmqmcvackpz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_hwGtCmqmcvackpz .L_16_blocks_overflow_hwGtCmqmcvackpz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_hwGtCmqmcvackpz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_itiiyBtdfcskbai subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_itiiyBtdfcskbai .L_small_initial_partial_block_itiiyBtdfcskbai: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_itiiyBtdfcskbai: orq %r8,%r8 je .L_after_reduction_itiiyBtdfcskbai vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_itiiyBtdfcskbai: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_9_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_DDnhmxjezrilein vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_DDnhmxjezrilein .L_16_blocks_overflow_DDnhmxjezrilein: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_DDnhmxjezrilein: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bBBEnlialjlpfsp subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bBBEnlialjlpfsp .L_small_initial_partial_block_bBBEnlialjlpfsp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bBBEnlialjlpfsp: orq %r8,%r8 je .L_after_reduction_bBBEnlialjlpfsp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bBBEnlialjlpfsp: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_10_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_zCijhbGCeraapou vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_zCijhbGCeraapou .L_16_blocks_overflow_zCijhbGCeraapou: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_zCijhbGCeraapou: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DzFChhwqqhyhjhC subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DzFChhwqqhyhjhC .L_small_initial_partial_block_DzFChhwqqhyhjhC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DzFChhwqqhyhjhC: orq %r8,%r8 je .L_after_reduction_DzFChhwqqhyhjhC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DzFChhwqqhyhjhC: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_11_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_aafwvnrniBpBhGh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_aafwvnrniBpBhGh .L_16_blocks_overflow_aafwvnrniBpBhGh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_aafwvnrniBpBhGh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ntDAaiasAzzqzla subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ntDAaiasAzzqzla .L_small_initial_partial_block_ntDAaiasAzzqzla: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ntDAaiasAzzqzla: orq %r8,%r8 je .L_after_reduction_ntDAaiasAzzqzla vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ntDAaiasAzzqzla: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_12_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_szlfmGmeuofoAra vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_szlfmGmeuofoAra .L_16_blocks_overflow_szlfmGmeuofoAra: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_szlfmGmeuofoAra: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FgCEuitmambDkxu subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FgCEuitmambDkxu .L_small_initial_partial_block_FgCEuitmambDkxu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FgCEuitmambDkxu: orq %r8,%r8 je .L_after_reduction_FgCEuitmambDkxu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FgCEuitmambDkxu: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_13_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_knBrwwsfezoBuDz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_knBrwwsfezoBuDz .L_16_blocks_overflow_knBrwwsfezoBuDz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_knBrwwsfezoBuDz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_szDFenAfBoEDgjz subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_szDFenAfBoEDgjz .L_small_initial_partial_block_szDFenAfBoEDgjz: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_szDFenAfBoEDgjz: orq %r8,%r8 je .L_after_reduction_szDFenAfBoEDgjz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_szDFenAfBoEDgjz: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_14_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_xfkAqxxGjDnhBjB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_xfkAqxxGjDnhBjB .L_16_blocks_overflow_xfkAqxxGjDnhBjB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_xfkAqxxGjDnhBjB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xpnwxzswluGFliu subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xpnwxzswluGFliu .L_small_initial_partial_block_xpnwxzswluGFliu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xpnwxzswluGFliu: orq %r8,%r8 je .L_after_reduction_xpnwxzswluGFliu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xpnwxzswluGFliu: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_15_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_myvDpkrqCoAukhb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_myvDpkrqCoAukhb .L_16_blocks_overflow_myvDpkrqCoAukhb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_myvDpkrqCoAukhb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jjDbyaqFmGmaiEB subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jjDbyaqFmGmaiEB .L_small_initial_partial_block_jjDbyaqFmGmaiEB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jjDbyaqFmGmaiEB: orq %r8,%r8 je .L_after_reduction_jjDbyaqFmGmaiEB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jjDbyaqFmGmaiEB: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_16_CfrpfvcjvvrcbGa: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_zEAEoetgkvqojFa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_zEAEoetgkvqojFa .L_16_blocks_overflow_zEAEoetgkvqojFa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_zEAEoetgkvqojFa: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_erAoEayjDqpuhEu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_erAoEayjDqpuhEu: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_erAoEayjDqpuhEu: jmp .L_last_blocks_done_CfrpfvcjvvrcbGa .L_last_num_blocks_is_0_CfrpfvcjvvrcbGa: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_CfrpfvcjvvrcbGa: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_pzwgkGgbplFqzaB .L_encrypt_16_blocks_pzwgkGgbplFqzaB: cmpb $240,%r15b jae .L_16_blocks_overflow_rkcxrDqAhslhkiA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_rkcxrDqAhslhkiA .L_16_blocks_overflow_rkcxrDqAhslhkiA: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_rkcxrDqAhslhkiA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%rax movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_aibBfiDGEtrGszv cmpl $8,%r10d je .L_last_num_blocks_is_8_aibBfiDGEtrGszv jb .L_last_num_blocks_is_7_1_aibBfiDGEtrGszv cmpl $12,%r10d je .L_last_num_blocks_is_12_aibBfiDGEtrGszv jb .L_last_num_blocks_is_11_9_aibBfiDGEtrGszv cmpl $15,%r10d je .L_last_num_blocks_is_15_aibBfiDGEtrGszv ja .L_last_num_blocks_is_16_aibBfiDGEtrGszv cmpl $14,%r10d je .L_last_num_blocks_is_14_aibBfiDGEtrGszv jmp .L_last_num_blocks_is_13_aibBfiDGEtrGszv .L_last_num_blocks_is_11_9_aibBfiDGEtrGszv: cmpl $10,%r10d je .L_last_num_blocks_is_10_aibBfiDGEtrGszv ja .L_last_num_blocks_is_11_aibBfiDGEtrGszv jmp .L_last_num_blocks_is_9_aibBfiDGEtrGszv .L_last_num_blocks_is_7_1_aibBfiDGEtrGszv: cmpl $4,%r10d je .L_last_num_blocks_is_4_aibBfiDGEtrGszv jb .L_last_num_blocks_is_3_1_aibBfiDGEtrGszv cmpl $6,%r10d ja .L_last_num_blocks_is_7_aibBfiDGEtrGszv je .L_last_num_blocks_is_6_aibBfiDGEtrGszv jmp .L_last_num_blocks_is_5_aibBfiDGEtrGszv .L_last_num_blocks_is_3_1_aibBfiDGEtrGszv: cmpl $2,%r10d ja .L_last_num_blocks_is_3_aibBfiDGEtrGszv je .L_last_num_blocks_is_2_aibBfiDGEtrGszv .L_last_num_blocks_is_1_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_CfAjeyGwbnghnsF vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_CfAjeyGwbnghnsF .L_16_blocks_overflow_CfAjeyGwbnghnsF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_CfAjeyGwbnghnsF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,8,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_EpCxqyApoFBApzn subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EpCxqyApoFBApzn .L_small_initial_partial_block_EpCxqyApoFBApzn: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_EpCxqyApoFBApzn .L_small_initial_compute_done_EpCxqyApoFBApzn: .L_after_reduction_EpCxqyApoFBApzn: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_2_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_sbkoxvmnmihnaig vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_sbkoxvmnmihnaig .L_16_blocks_overflow_sbkoxvmnmihnaig: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_sbkoxvmnmihnaig: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,40,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rlBeEnisjmybagx subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rlBeEnisjmybagx .L_small_initial_partial_block_rlBeEnisjmybagx: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rlBeEnisjmybagx: orq %r8,%r8 je .L_after_reduction_rlBeEnisjmybagx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rlBeEnisjmybagx: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_3_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_zopCCjajxtsjEdG vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_zopCCjajxtsjEdG .L_16_blocks_overflow_zopCCjajxtsjEdG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_zopCCjajxtsjEdG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hzwxdhlzEAlznGG subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hzwxdhlzEAlznGG .L_small_initial_partial_block_hzwxdhlzEAlznGG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hzwxdhlzEAlznGG: orq %r8,%r8 je .L_after_reduction_hzwxdhlzEAlznGG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hzwxdhlzEAlznGG: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_4_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_utgfjaowycovqbp vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_utgfjaowycovqbp .L_16_blocks_overflow_utgfjaowycovqbp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_utgfjaowycovqbp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AslmndcqqeqAFer subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AslmndcqqeqAFer .L_small_initial_partial_block_AslmndcqqeqAFer: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AslmndcqqeqAFer: orq %r8,%r8 je .L_after_reduction_AslmndcqqeqAFer vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AslmndcqqeqAFer: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_5_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_wugoGjfryfqCjFa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_wugoGjfryfqCjFa .L_16_blocks_overflow_wugoGjfryfqCjFa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_wugoGjfryfqCjFa: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CFkxkbxvkninECi subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CFkxkbxvkninECi .L_small_initial_partial_block_CFkxkbxvkninECi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CFkxkbxvkninECi: orq %r8,%r8 je .L_after_reduction_CFkxkbxvkninECi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CFkxkbxvkninECi: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_6_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_bpCexfjrkbCbhBc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_bpCexfjrkbCbhBc .L_16_blocks_overflow_bpCexfjrkbCbhBc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_bpCexfjrkbCbhBc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ojmsEGarpmywurj subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ojmsEGarpmywurj .L_small_initial_partial_block_ojmsEGarpmywurj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ojmsEGarpmywurj: orq %r8,%r8 je .L_after_reduction_ojmsEGarpmywurj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ojmsEGarpmywurj: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_7_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_ifByzBizpdBxFnD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_ifByzBizpdBxFnD .L_16_blocks_overflow_ifByzBizpdBxFnD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_ifByzBizpdBxFnD: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yEEshkytCfbpoyC subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yEEshkytCfbpoyC .L_small_initial_partial_block_yEEshkytCfbpoyC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yEEshkytCfbpoyC: orq %r8,%r8 je .L_after_reduction_yEEshkytCfbpoyC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yEEshkytCfbpoyC: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_8_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_cjwhqEvpCfjCcEa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_cjwhqEvpCfjCcEa .L_16_blocks_overflow_cjwhqEvpCfjCcEa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_cjwhqEvpCfjCcEa: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EqvthrGbiBgAmsm subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EqvthrGbiBgAmsm .L_small_initial_partial_block_EqvthrGbiBgAmsm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EqvthrGbiBgAmsm: orq %r8,%r8 je .L_after_reduction_EqvthrGbiBgAmsm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EqvthrGbiBgAmsm: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_9_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_xiomBjDmsdhvtig vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_xiomBjDmsdhvtig .L_16_blocks_overflow_xiomBjDmsdhvtig: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_xiomBjDmsdhvtig: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mbfjpvagktvcgbq subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mbfjpvagktvcgbq .L_small_initial_partial_block_mbfjpvagktvcgbq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mbfjpvagktvcgbq: orq %r8,%r8 je .L_after_reduction_mbfjpvagktvcgbq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mbfjpvagktvcgbq: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_10_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_cEyikykuFcExlBe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_cEyikykuFcExlBe .L_16_blocks_overflow_cEyikykuFcExlBe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_cEyikykuFcExlBe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zkabbaDExfgmaqw subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zkabbaDExfgmaqw .L_small_initial_partial_block_zkabbaDExfgmaqw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zkabbaDExfgmaqw: orq %r8,%r8 je .L_after_reduction_zkabbaDExfgmaqw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zkabbaDExfgmaqw: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_11_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_gsBoGfzrmwqlomo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_gsBoGfzrmwqlomo .L_16_blocks_overflow_gsBoGfzrmwqlomo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_gsBoGfzrmwqlomo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jaixjmwppjCmscj subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jaixjmwppjCmscj .L_small_initial_partial_block_jaixjmwppjCmscj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jaixjmwppjCmscj: orq %r8,%r8 je .L_after_reduction_jaixjmwppjCmscj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jaixjmwppjCmscj: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_12_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_CAvgqgqjrtonFws vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_CAvgqgqjrtonFws .L_16_blocks_overflow_CAvgqgqjrtonFws: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_CAvgqgqjrtonFws: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hvmFFygfifAjAnG subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hvmFFygfifAjAnG .L_small_initial_partial_block_hvmFFygfifAjAnG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hvmFFygfifAjAnG: orq %r8,%r8 je .L_after_reduction_hvmFFygfifAjAnG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hvmFFygfifAjAnG: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_13_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_zqBffksAbxFoiFr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_zqBffksAbxFoiFr .L_16_blocks_overflow_zqBffksAbxFoiFr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_zqBffksAbxFoiFr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kmvbbtEzBEoeAuq subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kmvbbtEzBEoeAuq .L_small_initial_partial_block_kmvbbtEzBEoeAuq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kmvbbtEzBEoeAuq: orq %r8,%r8 je .L_after_reduction_kmvbbtEzBEoeAuq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kmvbbtEzBEoeAuq: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_14_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_mBiifnhuGFDpfDy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_mBiifnhuGFDpfDy .L_16_blocks_overflow_mBiifnhuGFDpfDy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_mBiifnhuGFDpfDy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_imGnxqypsDyhyek subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_imGnxqypsDyhyek .L_small_initial_partial_block_imGnxqypsDyhyek: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_imGnxqypsDyhyek: orq %r8,%r8 je .L_after_reduction_imGnxqypsDyhyek vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_imGnxqypsDyhyek: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_15_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_zDGlqyFvuaglkeB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_zDGlqyFvuaglkeB .L_16_blocks_overflow_zDGlqyFvuaglkeB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_zDGlqyFvuaglkeB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BuGprjrzjxrmorl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BuGprjrzjxrmorl .L_small_initial_partial_block_BuGprjrzjxrmorl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BuGprjrzjxrmorl: orq %r8,%r8 je .L_after_reduction_BuGprjrzjxrmorl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BuGprjrzjxrmorl: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_16_aibBfiDGEtrGszv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_uwtqqfwgewBdjhg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_uwtqqfwgewBdjhg .L_16_blocks_overflow_uwtqqfwgewBdjhg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_uwtqqfwgewBdjhg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_ifytbdtuElzEqkG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ifytbdtuElzEqkG: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ifytbdtuElzEqkG: jmp .L_last_blocks_done_aibBfiDGEtrGszv .L_last_num_blocks_is_0_aibBfiDGEtrGszv: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_aibBfiDGEtrGszv: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_pzwgkGgbplFqzaB .L_message_below_32_blocks_pzwgkGgbplFqzaB: subq $256,%r8 addq $256,%rax movl %r8d,%r10d leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_mgjxphyGhnqeEta vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_mgjxphyGhnqeEta: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_FjCtcrwcdAhCtrr cmpl $8,%r10d je .L_last_num_blocks_is_8_FjCtcrwcdAhCtrr jb .L_last_num_blocks_is_7_1_FjCtcrwcdAhCtrr cmpl $12,%r10d je .L_last_num_blocks_is_12_FjCtcrwcdAhCtrr jb .L_last_num_blocks_is_11_9_FjCtcrwcdAhCtrr cmpl $15,%r10d je .L_last_num_blocks_is_15_FjCtcrwcdAhCtrr ja .L_last_num_blocks_is_16_FjCtcrwcdAhCtrr cmpl $14,%r10d je .L_last_num_blocks_is_14_FjCtcrwcdAhCtrr jmp .L_last_num_blocks_is_13_FjCtcrwcdAhCtrr .L_last_num_blocks_is_11_9_FjCtcrwcdAhCtrr: cmpl $10,%r10d je .L_last_num_blocks_is_10_FjCtcrwcdAhCtrr ja .L_last_num_blocks_is_11_FjCtcrwcdAhCtrr jmp .L_last_num_blocks_is_9_FjCtcrwcdAhCtrr .L_last_num_blocks_is_7_1_FjCtcrwcdAhCtrr: cmpl $4,%r10d je .L_last_num_blocks_is_4_FjCtcrwcdAhCtrr jb .L_last_num_blocks_is_3_1_FjCtcrwcdAhCtrr cmpl $6,%r10d ja .L_last_num_blocks_is_7_FjCtcrwcdAhCtrr je .L_last_num_blocks_is_6_FjCtcrwcdAhCtrr jmp .L_last_num_blocks_is_5_FjCtcrwcdAhCtrr .L_last_num_blocks_is_3_1_FjCtcrwcdAhCtrr: cmpl $2,%r10d ja .L_last_num_blocks_is_3_FjCtcrwcdAhCtrr je .L_last_num_blocks_is_2_FjCtcrwcdAhCtrr .L_last_num_blocks_is_1_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_zCjdttbyboeGxFb vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_zCjdttbyboeGxFb .L_16_blocks_overflow_zCjdttbyboeGxFb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_zCjdttbyboeGxFb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_ojiwxsAElGDCCBo subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ojiwxsAElGDCCBo .L_small_initial_partial_block_ojiwxsAElGDCCBo: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_ojiwxsAElGDCCBo .L_small_initial_compute_done_ojiwxsAElGDCCBo: .L_after_reduction_ojiwxsAElGDCCBo: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_2_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_fhFvhqpaozkgyzE vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_fhFvhqpaozkgyzE .L_16_blocks_overflow_fhFvhqpaozkgyzE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_fhFvhqpaozkgyzE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_poknuzddusxymkw subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_poknuzddusxymkw .L_small_initial_partial_block_poknuzddusxymkw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_poknuzddusxymkw: orq %r8,%r8 je .L_after_reduction_poknuzddusxymkw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_poknuzddusxymkw: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_3_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_hjBmpccGhruhCnv vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_hjBmpccGhruhCnv .L_16_blocks_overflow_hjBmpccGhruhCnv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_hjBmpccGhruhCnv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yshcwAFsbqgougy subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yshcwAFsbqgougy .L_small_initial_partial_block_yshcwAFsbqgougy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yshcwAFsbqgougy: orq %r8,%r8 je .L_after_reduction_yshcwAFsbqgougy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yshcwAFsbqgougy: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_4_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_bBrsEuBDcsAcscn vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_bBrsEuBDcsAcscn .L_16_blocks_overflow_bBrsEuBDcsAcscn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_bBrsEuBDcsAcscn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bDghuGEnDqEshwp subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bDghuGEnDqEshwp .L_small_initial_partial_block_bDghuGEnDqEshwp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bDghuGEnDqEshwp: orq %r8,%r8 je .L_after_reduction_bDghuGEnDqEshwp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bDghuGEnDqEshwp: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_5_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_nygdGeFptfwzvpw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_nygdGeFptfwzvpw .L_16_blocks_overflow_nygdGeFptfwzvpw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_nygdGeFptfwzvpw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dFGmpkoEnwhmCiq subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dFGmpkoEnwhmCiq .L_small_initial_partial_block_dFGmpkoEnwhmCiq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dFGmpkoEnwhmCiq: orq %r8,%r8 je .L_after_reduction_dFGmpkoEnwhmCiq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dFGmpkoEnwhmCiq: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_6_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_adtbeheumiAkmlw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_adtbeheumiAkmlw .L_16_blocks_overflow_adtbeheumiAkmlw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_adtbeheumiAkmlw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nDqCwyzuFDuivbj subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nDqCwyzuFDuivbj .L_small_initial_partial_block_nDqCwyzuFDuivbj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nDqCwyzuFDuivbj: orq %r8,%r8 je .L_after_reduction_nDqCwyzuFDuivbj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nDqCwyzuFDuivbj: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_7_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_aDdoAskralEtovy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_aDdoAskralEtovy .L_16_blocks_overflow_aDdoAskralEtovy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_aDdoAskralEtovy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GmgCerxizidGGeG subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GmgCerxizidGGeG .L_small_initial_partial_block_GmgCerxizidGGeG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GmgCerxizidGGeG: orq %r8,%r8 je .L_after_reduction_GmgCerxizidGGeG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GmgCerxizidGGeG: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_8_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_hjBdmnrbjjzAbzC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_hjBdmnrbjjzAbzC .L_16_blocks_overflow_hjBdmnrbjjzAbzC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_hjBdmnrbjjzAbzC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_oukGaFAnaceFaaB subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oukGaFAnaceFaaB .L_small_initial_partial_block_oukGaFAnaceFaaB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_oukGaFAnaceFaaB: orq %r8,%r8 je .L_after_reduction_oukGaFAnaceFaaB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_oukGaFAnaceFaaB: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_9_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_szBmuqzxwjxBawF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_szBmuqzxwjxBawF .L_16_blocks_overflow_szBmuqzxwjxBawF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_szBmuqzxwjxBawF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_paBklhesgEuGBAF subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_paBklhesgEuGBAF .L_small_initial_partial_block_paBklhesgEuGBAF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_paBklhesgEuGBAF: orq %r8,%r8 je .L_after_reduction_paBklhesgEuGBAF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_paBklhesgEuGBAF: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_10_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_xhlcvtlyGczsicp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_xhlcvtlyGczsicp .L_16_blocks_overflow_xhlcvtlyGczsicp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_xhlcvtlyGczsicp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nytpeiDsozzjuGs subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nytpeiDsozzjuGs .L_small_initial_partial_block_nytpeiDsozzjuGs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nytpeiDsozzjuGs: orq %r8,%r8 je .L_after_reduction_nytpeiDsozzjuGs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nytpeiDsozzjuGs: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_11_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_CkhBiupnDlzBoGx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_CkhBiupnDlzBoGx .L_16_blocks_overflow_CkhBiupnDlzBoGx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_CkhBiupnDlzBoGx: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BsdepnxnqoCzhkf subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BsdepnxnqoCzhkf .L_small_initial_partial_block_BsdepnxnqoCzhkf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BsdepnxnqoCzhkf: orq %r8,%r8 je .L_after_reduction_BsdepnxnqoCzhkf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BsdepnxnqoCzhkf: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_12_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_svvcxnisrDiilsD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_svvcxnisrDiilsD .L_16_blocks_overflow_svvcxnisrDiilsD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_svvcxnisrDiilsD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vFAcldEivdmCjng subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vFAcldEivdmCjng .L_small_initial_partial_block_vFAcldEivdmCjng: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vFAcldEivdmCjng: orq %r8,%r8 je .L_after_reduction_vFAcldEivdmCjng vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vFAcldEivdmCjng: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_13_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_oDDmorFzihnoffg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_oDDmorFzihnoffg .L_16_blocks_overflow_oDDmorFzihnoffg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_oDDmorFzihnoffg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yccbCzjnDwADEak subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yccbCzjnDwADEak .L_small_initial_partial_block_yccbCzjnDwADEak: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yccbCzjnDwADEak: orq %r8,%r8 je .L_after_reduction_yccbCzjnDwADEak vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yccbCzjnDwADEak: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_14_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_liipuseeafvnkfi vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_liipuseeafvnkfi .L_16_blocks_overflow_liipuseeafvnkfi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_liipuseeafvnkfi: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BentjlpjfFDzvxb subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BentjlpjfFDzvxb .L_small_initial_partial_block_BentjlpjfFDzvxb: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BentjlpjfFDzvxb: orq %r8,%r8 je .L_after_reduction_BentjlpjfFDzvxb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BentjlpjfFDzvxb: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_15_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_cuygxmuthGeaeby vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_cuygxmuthGeaeby .L_16_blocks_overflow_cuygxmuthGeaeby: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_cuygxmuthGeaeby: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qokhdigphzzzcxp subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qokhdigphzzzcxp .L_small_initial_partial_block_qokhdigphzzzcxp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qokhdigphzzzcxp: orq %r8,%r8 je .L_after_reduction_qokhdigphzzzcxp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qokhdigphzzzcxp: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_16_FjCtcrwcdAhCtrr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_AgkAgztElEpGqer vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_AgkAgztElEpGqer .L_16_blocks_overflow_AgkAgztElEpGqer: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_AgkAgztElEpGqer: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_eruDeitqttsEEhG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eruDeitqttsEEhG: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eruDeitqttsEEhG: jmp .L_last_blocks_done_FjCtcrwcdAhCtrr .L_last_num_blocks_is_0_FjCtcrwcdAhCtrr: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_FjCtcrwcdAhCtrr: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_pzwgkGgbplFqzaB .L_message_below_equal_16_blocks_pzwgkGgbplFqzaB: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_Arjlgemsqpaxhfj jl .L_small_initial_num_blocks_is_7_1_Arjlgemsqpaxhfj cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_Arjlgemsqpaxhfj jl .L_small_initial_num_blocks_is_11_9_Arjlgemsqpaxhfj cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_Arjlgemsqpaxhfj cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_Arjlgemsqpaxhfj cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_Arjlgemsqpaxhfj jmp .L_small_initial_num_blocks_is_13_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_11_9_Arjlgemsqpaxhfj: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_Arjlgemsqpaxhfj cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_Arjlgemsqpaxhfj jmp .L_small_initial_num_blocks_is_9_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_7_1_Arjlgemsqpaxhfj: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_Arjlgemsqpaxhfj jl .L_small_initial_num_blocks_is_3_1_Arjlgemsqpaxhfj cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_Arjlgemsqpaxhfj cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_Arjlgemsqpaxhfj jmp .L_small_initial_num_blocks_is_5_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_3_1_Arjlgemsqpaxhfj: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_Arjlgemsqpaxhfj cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_1_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONEa(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%rax,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,8,221,199 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_qFFkbngiCspnnzb subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qFFkbngiCspnnzb .L_small_initial_partial_block_qFFkbngiCspnnzb: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_qFFkbngiCspnnzb .L_small_initial_compute_done_qFFkbngiCspnnzb: .L_after_reduction_qFFkbngiCspnnzb: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_2_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%rax,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,40,221,199 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vcznqnCBEluErfz subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vcznqnCBEluErfz .L_small_initial_partial_block_vcznqnCBEluErfz: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vcznqnCBEluErfz: orq %r8,%r8 je .L_after_reduction_vcznqnCBEluErfz vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_vcznqnCBEluErfz: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_3_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lxlwCnecElggboh subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lxlwCnecElggboh .L_small_initial_partial_block_lxlwCnecElggboh: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lxlwCnecElggboh: orq %r8,%r8 je .L_after_reduction_lxlwCnecElggboh vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_lxlwCnecElggboh: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_4_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uwbAugwxtaEtqkm subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uwbAugwxtaEtqkm .L_small_initial_partial_block_uwbAugwxtaEtqkm: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uwbAugwxtaEtqkm: orq %r8,%r8 je .L_after_reduction_uwbAugwxtaEtqkm vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_uwbAugwxtaEtqkm: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_5_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,8,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DlAbwtibuwDuckF subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DlAbwtibuwDuckF .L_small_initial_partial_block_DlAbwtibuwDuckF: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DlAbwtibuwDuckF: orq %r8,%r8 je .L_after_reduction_DlAbwtibuwDuckF vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_DlAbwtibuwDuckF: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_6_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,40,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bGfevqujtGrmyqw subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bGfevqujtGrmyqw .L_small_initial_partial_block_bGfevqujtGrmyqw: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bGfevqujtGrmyqw: orq %r8,%r8 je .L_after_reduction_bGfevqujtGrmyqw vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_bGfevqujtGrmyqw: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_7_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BDbECEkpAEccDln subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BDbECEkpAEccDln .L_small_initial_partial_block_BDbECEkpAEccDln: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BDbECEkpAEccDln: orq %r8,%r8 je .L_after_reduction_BDbECEkpAEccDln vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_BDbECEkpAEccDln: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_8_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ngynpdbFzwtiwpp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ngynpdbFzwtiwpp .L_small_initial_partial_block_ngynpdbFzwtiwpp: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ngynpdbFzwtiwpp: orq %r8,%r8 je .L_after_reduction_ngynpdbFzwtiwpp vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ngynpdbFzwtiwpp: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_9_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,8,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bxCwlFCulijpvoi subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bxCwlFCulijpvoi .L_small_initial_partial_block_bxCwlFCulijpvoi: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bxCwlFCulijpvoi: orq %r8,%r8 je .L_after_reduction_bxCwlFCulijpvoi vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_bxCwlFCulijpvoi: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_10_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,40,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kAlkoicirsyCsoA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kAlkoicirsyCsoA .L_small_initial_partial_block_kAlkoicirsyCsoA: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kAlkoicirsyCsoA: orq %r8,%r8 je .L_after_reduction_kAlkoicirsyCsoA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_kAlkoicirsyCsoA: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_11_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rsxtmscApkaFsGk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rsxtmscApkaFsGk .L_small_initial_partial_block_rsxtmscApkaFsGk: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rsxtmscApkaFsGk: orq %r8,%r8 je .L_after_reduction_rsxtmscApkaFsGk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_rsxtmscApkaFsGk: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_12_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uytuqlquheEjDpf subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uytuqlquheEjDpf .L_small_initial_partial_block_uytuqlquheEjDpf: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uytuqlquheEjDpf: orq %r8,%r8 je .L_after_reduction_uytuqlquheEjDpf vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_uytuqlquheEjDpf: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_13_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,8,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AEjkiAmqCDcyaGF subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AEjkiAmqCDcyaGF .L_small_initial_partial_block_AEjkiAmqCDcyaGF: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AEjkiAmqCDcyaGF: orq %r8,%r8 je .L_after_reduction_AEjkiAmqCDcyaGF vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_AEjkiAmqCDcyaGF: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_14_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,40,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xweDbtGBgzaynjE subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xweDbtGBgzaynjE .L_small_initial_partial_block_xweDbtGBgzaynjE: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xweDbtGBgzaynjE: orq %r8,%r8 je .L_after_reduction_xweDbtGBgzaynjE vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_xweDbtGBgzaynjE: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_15_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ffBpbkEzFalCAqm subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ffBpbkEzFalCAqm .L_small_initial_partial_block_ffBpbkEzFalCAqm: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ffBpbkEzFalCAqm: orq %r8,%r8 je .L_after_reduction_ffBpbkEzFalCAqm vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ffBpbkEzFalCAqm: jmp .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj .L_small_initial_num_blocks_is_16_Arjlgemsqpaxhfj: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_icDnnqvChyBsuli: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_icDnnqvChyBsuli: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_icDnnqvChyBsuli: .L_small_initial_blocks_encrypted_Arjlgemsqpaxhfj: .L_ghash_done_pzwgkGgbplFqzaB: vmovdqu64 %xmm2,0(%rsi) .L_enc_dec_done_pzwgkGgbplFqzaB: vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_abort_pzwgkGgbplFqzaB: jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_192_avx512: orq %r8,%r8 je .L_enc_dec_abort_jzxBnczDBxGvzop xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 movl (%rdx),%eax orq %rax,%rax je .L_partial_block_done_CoBypAsApBwqcnx movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 leaq 80(%rsi),%r10 vmovdqu64 240(%r10),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %rax,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%rax,1),%r13 subq $16,%r13 jge .L_no_extra_mask_CoBypAsApBwqcnx subq %r13,%r12 .L_no_extra_mask_CoBypAsApBwqcnx: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_CoBypAsApBwqcnx .byte 98,243,13,8,68,252,17 .byte 98,115,13,8,68,212,0 .byte 98,115,13,8,68,220,1 .byte 98,115,13,8,68,244,16 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 .byte 98,83,37,8,68,214,1 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 .byte 98,83,37,8,68,214,0 vpsrldq $4,%xmm10,%xmm10 .byte 98,83,37,8,68,246,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movl $0,(%rdx) movq %rax,%r12 movq $16,%rax subq %r12,%rax jmp .L_enc_dec_done_CoBypAsApBwqcnx .L_partial_incomplete_CoBypAsApBwqcnx: addl %r8d,(%rdx) movq %r8,%rax .L_enc_dec_done_CoBypAsApBwqcnx: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%rax,2),%k1 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_CoBypAsApBwqcnx: vmovdqu64 0(%rsi),%xmm2 subq %rax,%r8 je .L_enc_dec_done_jzxBnczDBxGvzop cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_jzxBnczDBxGvzop vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_lelEEvckqsGkuGn vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_lelEEvckqsGkuGn .L_next_16_overflow_lelEEvckqsGkuGn: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_lelEEvckqsGkuGn: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%rax,1),%zmm0 vmovdqu8 64(%rcx,%rax,1),%zmm3 vmovdqu8 128(%rcx,%rax,1),%zmm4 vmovdqu8 192(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 176(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 192(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%rax,1) vmovdqu8 %zmm10,64(%r10,%rax,1) vmovdqu8 %zmm11,128(%r10,%rax,1) vmovdqu8 %zmm12,192(%r10,%rax,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_EclAduckuFhozAl vmovdqu64 192(%r12),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 128(%r12),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 64(%r12),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 0(%r12),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_EclAduckuFhozAl: cmpq $512,%r8 jb .L_message_below_32_blocks_jzxBnczDBxGvzop cmpb $240,%r15b jae .L_next_16_overflow_hzduBGFfzuzeflu vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_hzduBGFfzuzeflu .L_next_16_overflow_hzduBGFfzuzeflu: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_hzduBGFfzuzeflu: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%rax,1),%zmm0 vmovdqu8 320(%rcx,%rax,1),%zmm3 vmovdqu8 384(%rcx,%rax,1),%zmm4 vmovdqu8 448(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 176(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 192(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%rax,1) vmovdqu8 %zmm10,320(%r10,%rax,1) vmovdqu8 %zmm11,384(%r10,%rax,1) vmovdqu8 %zmm12,448(%r10,%rax,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_vFCorhCAmhdDCzm vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_vFCorhCAmhdDCzm: movq $1,%r14 addq $512,%rax subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_jzxBnczDBxGvzop .L_encrypt_big_nblocks_jzxBnczDBxGvzop: cmpb $240,%r15b jae .L_16_blocks_overflow_tbpqxctvntvnomu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_tbpqxctvntvnomu .L_16_blocks_overflow_tbpqxctvntvnomu: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_tbpqxctvntvnomu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_oaDubdDhvdaaGvl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_oaDubdDhvdaaGvl .L_16_blocks_overflow_oaDubdDhvdaaGvl: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_oaDubdDhvdaaGvl: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_BEBEkieDehCjfpg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_BEBEkieDehCjfpg .L_16_blocks_overflow_BEBEkieDehCjfpg: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_BEBEkieDehCjfpg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%rax,1),%zmm17 vmovdqu8 576(%rcx,%rax,1),%zmm19 vmovdqu8 640(%rcx,%rax,1),%zmm20 vmovdqu8 704(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%rax,1) vmovdqu8 %zmm3,576(%r10,%rax,1) vmovdqu8 %zmm4,640(%r10,%rax,1) vmovdqu8 %zmm5,704(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%rax subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_jzxBnczDBxGvzop .L_no_more_big_nblocks_jzxBnczDBxGvzop: cmpq $512,%r8 jae .L_encrypt_32_blocks_jzxBnczDBxGvzop cmpq $256,%r8 jae .L_encrypt_16_blocks_jzxBnczDBxGvzop .L_encrypt_0_blocks_ghash_32_jzxBnczDBxGvzop: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_BsvdFlCzDbBougk cmpl $8,%r10d je .L_last_num_blocks_is_8_BsvdFlCzDbBougk jb .L_last_num_blocks_is_7_1_BsvdFlCzDbBougk cmpl $12,%r10d je .L_last_num_blocks_is_12_BsvdFlCzDbBougk jb .L_last_num_blocks_is_11_9_BsvdFlCzDbBougk cmpl $15,%r10d je .L_last_num_blocks_is_15_BsvdFlCzDbBougk ja .L_last_num_blocks_is_16_BsvdFlCzDbBougk cmpl $14,%r10d je .L_last_num_blocks_is_14_BsvdFlCzDbBougk jmp .L_last_num_blocks_is_13_BsvdFlCzDbBougk .L_last_num_blocks_is_11_9_BsvdFlCzDbBougk: cmpl $10,%r10d je .L_last_num_blocks_is_10_BsvdFlCzDbBougk ja .L_last_num_blocks_is_11_BsvdFlCzDbBougk jmp .L_last_num_blocks_is_9_BsvdFlCzDbBougk .L_last_num_blocks_is_7_1_BsvdFlCzDbBougk: cmpl $4,%r10d je .L_last_num_blocks_is_4_BsvdFlCzDbBougk jb .L_last_num_blocks_is_3_1_BsvdFlCzDbBougk cmpl $6,%r10d ja .L_last_num_blocks_is_7_BsvdFlCzDbBougk je .L_last_num_blocks_is_6_BsvdFlCzDbBougk jmp .L_last_num_blocks_is_5_BsvdFlCzDbBougk .L_last_num_blocks_is_3_1_BsvdFlCzDbBougk: cmpl $2,%r10d ja .L_last_num_blocks_is_3_BsvdFlCzDbBougk je .L_last_num_blocks_is_2_BsvdFlCzDbBougk .L_last_num_blocks_is_1_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_czjqmrcuGbkhjtu vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_czjqmrcuGbkhjtu .L_16_blocks_overflow_czjqmrcuGbkhjtu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_czjqmrcuGbkhjtu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_kFnqfsluDrycrwr subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kFnqfsluDrycrwr .L_small_initial_partial_block_kFnqfsluDrycrwr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_kFnqfsluDrycrwr .L_small_initial_compute_done_kFnqfsluDrycrwr: .L_after_reduction_kFnqfsluDrycrwr: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_2_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_tCDuaqxntEtBCqr vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_tCDuaqxntEtBCqr .L_16_blocks_overflow_tCDuaqxntEtBCqr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_tCDuaqxntEtBCqr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nrarlmFvApvbzxy subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nrarlmFvApvbzxy .L_small_initial_partial_block_nrarlmFvApvbzxy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nrarlmFvApvbzxy: orq %r8,%r8 je .L_after_reduction_nrarlmFvApvbzxy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nrarlmFvApvbzxy: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_3_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_AxfvkflbDBEFEmp vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_AxfvkflbDBEFEmp .L_16_blocks_overflow_AxfvkflbDBEFEmp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_AxfvkflbDBEFEmp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wasBAmmrjbGbemo subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wasBAmmrjbGbemo .L_small_initial_partial_block_wasBAmmrjbGbemo: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wasBAmmrjbGbemo: orq %r8,%r8 je .L_after_reduction_wasBAmmrjbGbemo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wasBAmmrjbGbemo: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_4_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_xkpgotEfuidCEnC vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_xkpgotEfuidCEnC .L_16_blocks_overflow_xkpgotEfuidCEnC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_xkpgotEfuidCEnC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DvcssyjwzrqmFlE subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DvcssyjwzrqmFlE .L_small_initial_partial_block_DvcssyjwzrqmFlE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DvcssyjwzrqmFlE: orq %r8,%r8 je .L_after_reduction_DvcssyjwzrqmFlE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DvcssyjwzrqmFlE: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_5_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_pxAyyxhuewraobh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_pxAyyxhuewraobh .L_16_blocks_overflow_pxAyyxhuewraobh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_pxAyyxhuewraobh: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Eawjwfemrjotopq subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Eawjwfemrjotopq .L_small_initial_partial_block_Eawjwfemrjotopq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Eawjwfemrjotopq: orq %r8,%r8 je .L_after_reduction_Eawjwfemrjotopq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Eawjwfemrjotopq: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_6_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_rlBkdasaFkzjByu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_rlBkdasaFkzjByu .L_16_blocks_overflow_rlBkdasaFkzjByu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_rlBkdasaFkzjByu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AesejBFGrhphEgi subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AesejBFGrhphEgi .L_small_initial_partial_block_AesejBFGrhphEgi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AesejBFGrhphEgi: orq %r8,%r8 je .L_after_reduction_AesejBFGrhphEgi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AesejBFGrhphEgi: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_7_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_keqkskoubnuElfA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_keqkskoubnuElfA .L_16_blocks_overflow_keqkskoubnuElfA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_keqkskoubnuElfA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BxDABaeeqkhilCj subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BxDABaeeqkhilCj .L_small_initial_partial_block_BxDABaeeqkhilCj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BxDABaeeqkhilCj: orq %r8,%r8 je .L_after_reduction_BxDABaeeqkhilCj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BxDABaeeqkhilCj: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_8_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_hwCFDDlqwBqrdyx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_hwCFDDlqwBqrdyx .L_16_blocks_overflow_hwCFDDlqwBqrdyx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_hwCFDDlqwBqrdyx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BeuuFnmEliqBmCs subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BeuuFnmEliqBmCs .L_small_initial_partial_block_BeuuFnmEliqBmCs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BeuuFnmEliqBmCs: orq %r8,%r8 je .L_after_reduction_BeuuFnmEliqBmCs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BeuuFnmEliqBmCs: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_9_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_ybEEnfpGmbdDyaC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_ybEEnfpGmbdDyaC .L_16_blocks_overflow_ybEEnfpGmbdDyaC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_ybEEnfpGmbdDyaC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bDrrnAatcuCrjCa subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bDrrnAatcuCrjCa .L_small_initial_partial_block_bDrrnAatcuCrjCa: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bDrrnAatcuCrjCa: orq %r8,%r8 je .L_after_reduction_bDrrnAatcuCrjCa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bDrrnAatcuCrjCa: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_10_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_opfbCaznAiAepnv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_opfbCaznAiAepnv .L_16_blocks_overflow_opfbCaznAiAepnv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_opfbCaznAiAepnv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CsmvpucAbBEBcvl subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CsmvpucAbBEBcvl .L_small_initial_partial_block_CsmvpucAbBEBcvl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CsmvpucAbBEBcvl: orq %r8,%r8 je .L_after_reduction_CsmvpucAbBEBcvl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CsmvpucAbBEBcvl: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_11_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_qxFolltldGnscDg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_qxFolltldGnscDg .L_16_blocks_overflow_qxFolltldGnscDg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_qxFolltldGnscDg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AaGweewAhEribny subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AaGweewAhEribny .L_small_initial_partial_block_AaGweewAhEribny: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AaGweewAhEribny: orq %r8,%r8 je .L_after_reduction_AaGweewAhEribny vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AaGweewAhEribny: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_12_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_nvmdGffBdmtukpe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_nvmdGffBdmtukpe .L_16_blocks_overflow_nvmdGffBdmtukpe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_nvmdGffBdmtukpe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FoabkbEhqjtqagB subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FoabkbEhqjtqagB .L_small_initial_partial_block_FoabkbEhqjtqagB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FoabkbEhqjtqagB: orq %r8,%r8 je .L_after_reduction_FoabkbEhqjtqagB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FoabkbEhqjtqagB: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_13_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_zGEqEwwbyegFygC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_zGEqEwwbyegFygC .L_16_blocks_overflow_zGEqEwwbyegFygC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_zGEqEwwbyegFygC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gjvieAerDfDGsxy subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gjvieAerDfDGsxy .L_small_initial_partial_block_gjvieAerDfDGsxy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gjvieAerDfDGsxy: orq %r8,%r8 je .L_after_reduction_gjvieAerDfDGsxy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gjvieAerDfDGsxy: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_14_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_hGfdBnfArvqgnDo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_hGfdBnfArvqgnDo .L_16_blocks_overflow_hGfdBnfArvqgnDo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_hGfdBnfArvqgnDo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_faDbEijoauEqsyG subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_faDbEijoauEqsyG .L_small_initial_partial_block_faDbEijoauEqsyG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_faDbEijoauEqsyG: orq %r8,%r8 je .L_after_reduction_faDbEijoauEqsyG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_faDbEijoauEqsyG: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_15_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_AhbxhfFAjAuyeFk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_AhbxhfFAjAuyeFk .L_16_blocks_overflow_AhbxhfFAjAuyeFk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_AhbxhfFAjAuyeFk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sxxFbklDpjCfEvm subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sxxFbklDpjCfEvm .L_small_initial_partial_block_sxxFbklDpjCfEvm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sxxFbklDpjCfEvm: orq %r8,%r8 je .L_after_reduction_sxxFbklDpjCfEvm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sxxFbklDpjCfEvm: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_16_BsvdFlCzDbBougk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_njybzcioxuyaaaD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_njybzcioxuyaaaD .L_16_blocks_overflow_njybzcioxuyaaaD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_njybzcioxuyaaaD: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_bCffyflcoaBxCzy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bCffyflcoaBxCzy: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bCffyflcoaBxCzy: jmp .L_last_blocks_done_BsvdFlCzDbBougk .L_last_num_blocks_is_0_BsvdFlCzDbBougk: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_BsvdFlCzDbBougk: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_jzxBnczDBxGvzop .L_encrypt_32_blocks_jzxBnczDBxGvzop: cmpb $240,%r15b jae .L_16_blocks_overflow_wafuliacDuosCms vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_wafuliacDuosCms .L_16_blocks_overflow_wafuliacDuosCms: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_wafuliacDuosCms: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_kAejpmvyzczzucF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_kAejpmvyzczzucF .L_16_blocks_overflow_kAejpmvyzczzucF: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_kAejpmvyzczzucF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%rax movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_vkvGtsckpeodFyd cmpl $8,%r10d je .L_last_num_blocks_is_8_vkvGtsckpeodFyd jb .L_last_num_blocks_is_7_1_vkvGtsckpeodFyd cmpl $12,%r10d je .L_last_num_blocks_is_12_vkvGtsckpeodFyd jb .L_last_num_blocks_is_11_9_vkvGtsckpeodFyd cmpl $15,%r10d je .L_last_num_blocks_is_15_vkvGtsckpeodFyd ja .L_last_num_blocks_is_16_vkvGtsckpeodFyd cmpl $14,%r10d je .L_last_num_blocks_is_14_vkvGtsckpeodFyd jmp .L_last_num_blocks_is_13_vkvGtsckpeodFyd .L_last_num_blocks_is_11_9_vkvGtsckpeodFyd: cmpl $10,%r10d je .L_last_num_blocks_is_10_vkvGtsckpeodFyd ja .L_last_num_blocks_is_11_vkvGtsckpeodFyd jmp .L_last_num_blocks_is_9_vkvGtsckpeodFyd .L_last_num_blocks_is_7_1_vkvGtsckpeodFyd: cmpl $4,%r10d je .L_last_num_blocks_is_4_vkvGtsckpeodFyd jb .L_last_num_blocks_is_3_1_vkvGtsckpeodFyd cmpl $6,%r10d ja .L_last_num_blocks_is_7_vkvGtsckpeodFyd je .L_last_num_blocks_is_6_vkvGtsckpeodFyd jmp .L_last_num_blocks_is_5_vkvGtsckpeodFyd .L_last_num_blocks_is_3_1_vkvGtsckpeodFyd: cmpl $2,%r10d ja .L_last_num_blocks_is_3_vkvGtsckpeodFyd je .L_last_num_blocks_is_2_vkvGtsckpeodFyd .L_last_num_blocks_is_1_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_BvAqyjatyidEnnt vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_BvAqyjatyidEnnt .L_16_blocks_overflow_BvAqyjatyidEnnt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_BvAqyjatyidEnnt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_gBeshkmzGvkmrAi subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gBeshkmzGvkmrAi .L_small_initial_partial_block_gBeshkmzGvkmrAi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_gBeshkmzGvkmrAi .L_small_initial_compute_done_gBeshkmzGvkmrAi: .L_after_reduction_gBeshkmzGvkmrAi: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_2_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_nbawutokAutAqum vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_nbawutokAutAqum .L_16_blocks_overflow_nbawutokAutAqum: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_nbawutokAutAqum: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BlGfnlBkldmmFcw subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BlGfnlBkldmmFcw .L_small_initial_partial_block_BlGfnlBkldmmFcw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BlGfnlBkldmmFcw: orq %r8,%r8 je .L_after_reduction_BlGfnlBkldmmFcw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BlGfnlBkldmmFcw: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_3_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_CwkxGelBrtqaaxv vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_CwkxGelBrtqaaxv .L_16_blocks_overflow_CwkxGelBrtqaaxv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_CwkxGelBrtqaaxv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EEhEwlabesmvDev subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EEhEwlabesmvDev .L_small_initial_partial_block_EEhEwlabesmvDev: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EEhEwlabesmvDev: orq %r8,%r8 je .L_after_reduction_EEhEwlabesmvDev vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EEhEwlabesmvDev: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_4_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_gFpynBlybCeGalG vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_gFpynBlybCeGalG .L_16_blocks_overflow_gFpynBlybCeGalG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_gFpynBlybCeGalG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gFxpzjaswtGGooa subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gFxpzjaswtGGooa .L_small_initial_partial_block_gFxpzjaswtGGooa: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gFxpzjaswtGGooa: orq %r8,%r8 je .L_after_reduction_gFxpzjaswtGGooa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gFxpzjaswtGGooa: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_5_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_xwErcCwicbEwFqC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_xwErcCwicbEwFqC .L_16_blocks_overflow_xwErcCwicbEwFqC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_xwErcCwicbEwFqC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GaEkADDkkdyyuqC subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GaEkADDkkdyyuqC .L_small_initial_partial_block_GaEkADDkkdyyuqC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GaEkADDkkdyyuqC: orq %r8,%r8 je .L_after_reduction_GaEkADDkkdyyuqC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GaEkADDkkdyyuqC: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_6_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_baDecrAptncCCuf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_baDecrAptncCCuf .L_16_blocks_overflow_baDecrAptncCCuf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_baDecrAptncCCuf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GuszoBBsEjlucdt subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GuszoBBsEjlucdt .L_small_initial_partial_block_GuszoBBsEjlucdt: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GuszoBBsEjlucdt: orq %r8,%r8 je .L_after_reduction_GuszoBBsEjlucdt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GuszoBBsEjlucdt: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_7_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_iltrljarpeDchus vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_iltrljarpeDchus .L_16_blocks_overflow_iltrljarpeDchus: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_iltrljarpeDchus: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iGfglGojAckhaEr subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iGfglGojAckhaEr .L_small_initial_partial_block_iGfglGojAckhaEr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iGfglGojAckhaEr: orq %r8,%r8 je .L_after_reduction_iGfglGojAckhaEr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iGfglGojAckhaEr: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_8_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_eyzjCojxduufqEi vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_eyzjCojxduufqEi .L_16_blocks_overflow_eyzjCojxduufqEi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_eyzjCojxduufqEi: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hczvcmipanjdewG subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hczvcmipanjdewG .L_small_initial_partial_block_hczvcmipanjdewG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hczvcmipanjdewG: orq %r8,%r8 je .L_after_reduction_hczvcmipanjdewG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hczvcmipanjdewG: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_9_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_bwdCwgCmnErFeDe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_bwdCwgCmnErFeDe .L_16_blocks_overflow_bwdCwgCmnErFeDe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_bwdCwgCmnErFeDe: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nczsEBrGqvtCBoe subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nczsEBrGqvtCBoe .L_small_initial_partial_block_nczsEBrGqvtCBoe: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nczsEBrGqvtCBoe: orq %r8,%r8 je .L_after_reduction_nczsEBrGqvtCBoe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nczsEBrGqvtCBoe: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_10_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_eGGpBsfFnpwwbub vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_eGGpBsfFnpwwbub .L_16_blocks_overflow_eGGpBsfFnpwwbub: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_eGGpBsfFnpwwbub: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EcwCefEtlqcfEms subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EcwCefEtlqcfEms .L_small_initial_partial_block_EcwCefEtlqcfEms: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EcwCefEtlqcfEms: orq %r8,%r8 je .L_after_reduction_EcwCefEtlqcfEms vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EcwCefEtlqcfEms: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_11_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_eddhoEuAgjbBjFF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_eddhoEuAgjbBjFF .L_16_blocks_overflow_eddhoEuAgjbBjFF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_eddhoEuAgjbBjFF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wytgrCdaysqdDEF subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wytgrCdaysqdDEF .L_small_initial_partial_block_wytgrCdaysqdDEF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wytgrCdaysqdDEF: orq %r8,%r8 je .L_after_reduction_wytgrCdaysqdDEF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wytgrCdaysqdDEF: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_12_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_bfsFAnmADrmmioq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_bfsFAnmADrmmioq .L_16_blocks_overflow_bfsFAnmADrmmioq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_bfsFAnmADrmmioq: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pzClwApspseFxiy subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pzClwApspseFxiy .L_small_initial_partial_block_pzClwApspseFxiy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pzClwApspseFxiy: orq %r8,%r8 je .L_after_reduction_pzClwApspseFxiy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pzClwApspseFxiy: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_13_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_eqddxBoxqiwCsny vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_eqddxBoxqiwCsny .L_16_blocks_overflow_eqddxBoxqiwCsny: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_eqddxBoxqiwCsny: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jtCktBigdCvArrs subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jtCktBigdCvArrs .L_small_initial_partial_block_jtCktBigdCvArrs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jtCktBigdCvArrs: orq %r8,%r8 je .L_after_reduction_jtCktBigdCvArrs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jtCktBigdCvArrs: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_14_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_DAGxccpeauyqpCa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_DAGxccpeauyqpCa .L_16_blocks_overflow_DAGxccpeauyqpCa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_DAGxccpeauyqpCa: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eekywuGEAhgthae subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eekywuGEAhgthae .L_small_initial_partial_block_eekywuGEAhgthae: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eekywuGEAhgthae: orq %r8,%r8 je .L_after_reduction_eekywuGEAhgthae vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eekywuGEAhgthae: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_15_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_xrzdkvEbdpatlsn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_xrzdkvEbdpatlsn .L_16_blocks_overflow_xrzdkvEbdpatlsn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_xrzdkvEbdpatlsn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nvxEscrdCznvhGj subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nvxEscrdCznvhGj .L_small_initial_partial_block_nvxEscrdCznvhGj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nvxEscrdCznvhGj: orq %r8,%r8 je .L_after_reduction_nvxEscrdCznvhGj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nvxEscrdCznvhGj: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_16_vkvGtsckpeodFyd: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_nhkzxmwsyGuskoi vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_nhkzxmwsyGuskoi .L_16_blocks_overflow_nhkzxmwsyGuskoi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_nhkzxmwsyGuskoi: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_ECtspjaqpoxwhnx: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ECtspjaqpoxwhnx: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ECtspjaqpoxwhnx: jmp .L_last_blocks_done_vkvGtsckpeodFyd .L_last_num_blocks_is_0_vkvGtsckpeodFyd: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_vkvGtsckpeodFyd: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_jzxBnczDBxGvzop .L_encrypt_16_blocks_jzxBnczDBxGvzop: cmpb $240,%r15b jae .L_16_blocks_overflow_kkhtsxadreytpgc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_kkhtsxadreytpgc .L_16_blocks_overflow_kkhtsxadreytpgc: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_kkhtsxadreytpgc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%rax movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_CGhwyqzCxCrDAod cmpl $8,%r10d je .L_last_num_blocks_is_8_CGhwyqzCxCrDAod jb .L_last_num_blocks_is_7_1_CGhwyqzCxCrDAod cmpl $12,%r10d je .L_last_num_blocks_is_12_CGhwyqzCxCrDAod jb .L_last_num_blocks_is_11_9_CGhwyqzCxCrDAod cmpl $15,%r10d je .L_last_num_blocks_is_15_CGhwyqzCxCrDAod ja .L_last_num_blocks_is_16_CGhwyqzCxCrDAod cmpl $14,%r10d je .L_last_num_blocks_is_14_CGhwyqzCxCrDAod jmp .L_last_num_blocks_is_13_CGhwyqzCxCrDAod .L_last_num_blocks_is_11_9_CGhwyqzCxCrDAod: cmpl $10,%r10d je .L_last_num_blocks_is_10_CGhwyqzCxCrDAod ja .L_last_num_blocks_is_11_CGhwyqzCxCrDAod jmp .L_last_num_blocks_is_9_CGhwyqzCxCrDAod .L_last_num_blocks_is_7_1_CGhwyqzCxCrDAod: cmpl $4,%r10d je .L_last_num_blocks_is_4_CGhwyqzCxCrDAod jb .L_last_num_blocks_is_3_1_CGhwyqzCxCrDAod cmpl $6,%r10d ja .L_last_num_blocks_is_7_CGhwyqzCxCrDAod je .L_last_num_blocks_is_6_CGhwyqzCxCrDAod jmp .L_last_num_blocks_is_5_CGhwyqzCxCrDAod .L_last_num_blocks_is_3_1_CGhwyqzCxCrDAod: cmpl $2,%r10d ja .L_last_num_blocks_is_3_CGhwyqzCxCrDAod je .L_last_num_blocks_is_2_CGhwyqzCxCrDAod .L_last_num_blocks_is_1_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_ycAFtgAvrzFpmud vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_ycAFtgAvrzFpmud .L_16_blocks_overflow_ycAFtgAvrzFpmud: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_ycAFtgAvrzFpmud: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,8,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_CbqAGqoFBCoBcnn subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CbqAGqoFBCoBcnn .L_small_initial_partial_block_CbqAGqoFBCoBcnn: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_CbqAGqoFBCoBcnn .L_small_initial_compute_done_CbqAGqoFBCoBcnn: .L_after_reduction_CbqAGqoFBCoBcnn: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_2_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_unaFqvbBnCelmgG vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_unaFqvbBnCelmgG .L_16_blocks_overflow_unaFqvbBnCelmgG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_unaFqvbBnCelmgG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,40,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gzvpemiEleCjEbC subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gzvpemiEleCjEbC .L_small_initial_partial_block_gzvpemiEleCjEbC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gzvpemiEleCjEbC: orq %r8,%r8 je .L_after_reduction_gzvpemiEleCjEbC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gzvpemiEleCjEbC: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_3_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_FzufylrxyerzBEy vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_FzufylrxyerzBEy .L_16_blocks_overflow_FzufylrxyerzBEy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_FzufylrxyerzBEy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gqwjyzltkrfhGvo subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gqwjyzltkrfhGvo .L_small_initial_partial_block_gqwjyzltkrfhGvo: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gqwjyzltkrfhGvo: orq %r8,%r8 je .L_after_reduction_gqwjyzltkrfhGvo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gqwjyzltkrfhGvo: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_4_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_FtupvahihsnvuAd vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_FtupvahihsnvuAd .L_16_blocks_overflow_FtupvahihsnvuAd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_FtupvahihsnvuAd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wBowoFhurirchGq subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wBowoFhurirchGq .L_small_initial_partial_block_wBowoFhurirchGq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wBowoFhurirchGq: orq %r8,%r8 je .L_after_reduction_wBowoFhurirchGq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wBowoFhurirchGq: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_5_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_uBhGhomDazsjBak vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_uBhGhomDazsjBak .L_16_blocks_overflow_uBhGhomDazsjBak: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_uBhGhomDazsjBak: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eFbGprqpsBhvBkh subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eFbGprqpsBhvBkh .L_small_initial_partial_block_eFbGprqpsBhvBkh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eFbGprqpsBhvBkh: orq %r8,%r8 je .L_after_reduction_eFbGprqpsBhvBkh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eFbGprqpsBhvBkh: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_6_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_mBfhrGpovoncBkc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_mBfhrGpovoncBkc .L_16_blocks_overflow_mBfhrGpovoncBkc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_mBfhrGpovoncBkc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qFilbDGEygcyzzw subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qFilbDGEygcyzzw .L_small_initial_partial_block_qFilbDGEygcyzzw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qFilbDGEygcyzzw: orq %r8,%r8 je .L_after_reduction_qFilbDGEygcyzzw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qFilbDGEygcyzzw: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_7_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_FvpewBABrfyByvd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_FvpewBABrfyByvd .L_16_blocks_overflow_FvpewBABrfyByvd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_FvpewBABrfyByvd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vrmegiBFdzfFmfq subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vrmegiBFdzfFmfq .L_small_initial_partial_block_vrmegiBFdzfFmfq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vrmegiBFdzfFmfq: orq %r8,%r8 je .L_after_reduction_vrmegiBFdzfFmfq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vrmegiBFdzfFmfq: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_8_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_FsoptjzAkrqyAAr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_FsoptjzAkrqyAAr .L_16_blocks_overflow_FsoptjzAkrqyAAr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_FsoptjzAkrqyAAr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vkFiBjCFtrykuwD subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vkFiBjCFtrykuwD .L_small_initial_partial_block_vkFiBjCFtrykuwD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vkFiBjCFtrykuwD: orq %r8,%r8 je .L_after_reduction_vkFiBjCFtrykuwD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vkFiBjCFtrykuwD: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_9_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_iABBxfvotBEkECx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_iABBxfvotBEkECx .L_16_blocks_overflow_iABBxfvotBEkECx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_iABBxfvotBEkECx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ypbbgpxgCctCtxy subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ypbbgpxgCctCtxy .L_small_initial_partial_block_ypbbgpxgCctCtxy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ypbbgpxgCctCtxy: orq %r8,%r8 je .L_after_reduction_ypbbgpxgCctCtxy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ypbbgpxgCctCtxy: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_10_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_jEngtqCkuniGdjp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_jEngtqCkuniGdjp .L_16_blocks_overflow_jEngtqCkuniGdjp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_jEngtqCkuniGdjp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nffhkznowjoDiCf subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nffhkznowjoDiCf .L_small_initial_partial_block_nffhkznowjoDiCf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nffhkznowjoDiCf: orq %r8,%r8 je .L_after_reduction_nffhkznowjoDiCf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nffhkznowjoDiCf: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_11_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_beoirgaAxslixji vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_beoirgaAxslixji .L_16_blocks_overflow_beoirgaAxslixji: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_beoirgaAxslixji: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cqzlemDcyGkhDnC subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cqzlemDcyGkhDnC .L_small_initial_partial_block_cqzlemDcyGkhDnC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cqzlemDcyGkhDnC: orq %r8,%r8 je .L_after_reduction_cqzlemDcyGkhDnC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cqzlemDcyGkhDnC: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_12_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_sxrCycfBickEpCs vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_sxrCycfBickEpCs .L_16_blocks_overflow_sxrCycfBickEpCs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_sxrCycfBickEpCs: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lzBzlrbzBeACuhk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lzBzlrbzBeACuhk .L_small_initial_partial_block_lzBzlrbzBeACuhk: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lzBzlrbzBeACuhk: orq %r8,%r8 je .L_after_reduction_lzBzlrbzBeACuhk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lzBzlrbzBeACuhk: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_13_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_sesGGmqiCkypotq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_sesGGmqiCkypotq .L_16_blocks_overflow_sesGGmqiCkypotq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_sesGGmqiCkypotq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qihphhEmthsffzk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qihphhEmthsffzk .L_small_initial_partial_block_qihphhEmthsffzk: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qihphhEmthsffzk: orq %r8,%r8 je .L_after_reduction_qihphhEmthsffzk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qihphhEmthsffzk: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_14_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_jqifyxAoeoxkDuE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_jqifyxAoeoxkDuE .L_16_blocks_overflow_jqifyxAoeoxkDuE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_jqifyxAoeoxkDuE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FkuwuuqBpnEvzkd subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FkuwuuqBpnEvzkd .L_small_initial_partial_block_FkuwuuqBpnEvzkd: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FkuwuuqBpnEvzkd: orq %r8,%r8 je .L_after_reduction_FkuwuuqBpnEvzkd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FkuwuuqBpnEvzkd: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_15_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_CBqhusrmEugbwks vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_CBqhusrmEugbwks .L_16_blocks_overflow_CBqhusrmEugbwks: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_CBqhusrmEugbwks: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qEmtvwDozjnABmp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qEmtvwDozjnABmp .L_small_initial_partial_block_qEmtvwDozjnABmp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qEmtvwDozjnABmp: orq %r8,%r8 je .L_after_reduction_qEmtvwDozjnABmp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qEmtvwDozjnABmp: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_16_CGhwyqzCxCrDAod: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_hDfCleGEdmpzBiw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_hDfCleGEdmpzBiw .L_16_blocks_overflow_hDfCleGEdmpzBiw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_hDfCleGEdmpzBiw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_pybhdxzahdqcprl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pybhdxzahdqcprl: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pybhdxzahdqcprl: jmp .L_last_blocks_done_CGhwyqzCxCrDAod .L_last_num_blocks_is_0_CGhwyqzCxCrDAod: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_CGhwyqzCxCrDAod: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_jzxBnczDBxGvzop .L_message_below_32_blocks_jzxBnczDBxGvzop: subq $256,%r8 addq $256,%rax movl %r8d,%r10d leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_dzmCrsBiciGnliE vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_dzmCrsBiciGnliE: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_aFwwehusEvmmDke cmpl $8,%r10d je .L_last_num_blocks_is_8_aFwwehusEvmmDke jb .L_last_num_blocks_is_7_1_aFwwehusEvmmDke cmpl $12,%r10d je .L_last_num_blocks_is_12_aFwwehusEvmmDke jb .L_last_num_blocks_is_11_9_aFwwehusEvmmDke cmpl $15,%r10d je .L_last_num_blocks_is_15_aFwwehusEvmmDke ja .L_last_num_blocks_is_16_aFwwehusEvmmDke cmpl $14,%r10d je .L_last_num_blocks_is_14_aFwwehusEvmmDke jmp .L_last_num_blocks_is_13_aFwwehusEvmmDke .L_last_num_blocks_is_11_9_aFwwehusEvmmDke: cmpl $10,%r10d je .L_last_num_blocks_is_10_aFwwehusEvmmDke ja .L_last_num_blocks_is_11_aFwwehusEvmmDke jmp .L_last_num_blocks_is_9_aFwwehusEvmmDke .L_last_num_blocks_is_7_1_aFwwehusEvmmDke: cmpl $4,%r10d je .L_last_num_blocks_is_4_aFwwehusEvmmDke jb .L_last_num_blocks_is_3_1_aFwwehusEvmmDke cmpl $6,%r10d ja .L_last_num_blocks_is_7_aFwwehusEvmmDke je .L_last_num_blocks_is_6_aFwwehusEvmmDke jmp .L_last_num_blocks_is_5_aFwwehusEvmmDke .L_last_num_blocks_is_3_1_aFwwehusEvmmDke: cmpl $2,%r10d ja .L_last_num_blocks_is_3_aFwwehusEvmmDke je .L_last_num_blocks_is_2_aFwwehusEvmmDke .L_last_num_blocks_is_1_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_BsFiEfmuvxGEGuk vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_BsFiEfmuvxGEGuk .L_16_blocks_overflow_BsFiEfmuvxGEGuk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_BsFiEfmuvxGEGuk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_zEujlpbgqDyCdvt subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zEujlpbgqDyCdvt .L_small_initial_partial_block_zEujlpbgqDyCdvt: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_zEujlpbgqDyCdvt .L_small_initial_compute_done_zEujlpbgqDyCdvt: .L_after_reduction_zEujlpbgqDyCdvt: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_2_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_DrefbggoCuhFosm vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_DrefbggoCuhFosm .L_16_blocks_overflow_DrefbggoCuhFosm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_DrefbggoCuhFosm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rjkFEDDDoeuwufs subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rjkFEDDDoeuwufs .L_small_initial_partial_block_rjkFEDDDoeuwufs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rjkFEDDDoeuwufs: orq %r8,%r8 je .L_after_reduction_rjkFEDDDoeuwufs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rjkFEDDDoeuwufs: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_3_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_oskEeEmCEGeqECv vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_oskEeEmCEGeqECv .L_16_blocks_overflow_oskEeEmCEGeqECv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_oskEeEmCEGeqECv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sgeerDwthydzyuy subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sgeerDwthydzyuy .L_small_initial_partial_block_sgeerDwthydzyuy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sgeerDwthydzyuy: orq %r8,%r8 je .L_after_reduction_sgeerDwthydzyuy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sgeerDwthydzyuy: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_4_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_aAxBGtfyfEadAkB vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_aAxBGtfyfEadAkB .L_16_blocks_overflow_aAxBGtfyfEadAkB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_aAxBGtfyfEadAkB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Dqjcrneuragvwkw subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Dqjcrneuragvwkw .L_small_initial_partial_block_Dqjcrneuragvwkw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Dqjcrneuragvwkw: orq %r8,%r8 je .L_after_reduction_Dqjcrneuragvwkw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Dqjcrneuragvwkw: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_5_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_bpEikxmsheidfwq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_bpEikxmsheidfwq .L_16_blocks_overflow_bpEikxmsheidfwq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_bpEikxmsheidfwq: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AGuqEbsAbinbrDm subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AGuqEbsAbinbrDm .L_small_initial_partial_block_AGuqEbsAbinbrDm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AGuqEbsAbinbrDm: orq %r8,%r8 je .L_after_reduction_AGuqEbsAbinbrDm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AGuqEbsAbinbrDm: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_6_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_otEmDDixbpFEmvy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_otEmDDixbpFEmvy .L_16_blocks_overflow_otEmDDixbpFEmvy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_otEmDDixbpFEmvy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FAvepDmDsogujha subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FAvepDmDsogujha .L_small_initial_partial_block_FAvepDmDsogujha: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FAvepDmDsogujha: orq %r8,%r8 je .L_after_reduction_FAvepDmDsogujha vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FAvepDmDsogujha: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_7_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_kEvFawDBkeclidj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_kEvFawDBkeclidj .L_16_blocks_overflow_kEvFawDBkeclidj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_kEvFawDBkeclidj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jyvbjxevpurblup subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jyvbjxevpurblup .L_small_initial_partial_block_jyvbjxevpurblup: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jyvbjxevpurblup: orq %r8,%r8 je .L_after_reduction_jyvbjxevpurblup vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jyvbjxevpurblup: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_8_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_nfBegzmtymkjkuE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_nfBegzmtymkjkuE .L_16_blocks_overflow_nfBegzmtymkjkuE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_nfBegzmtymkjkuE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ArBbnussymieuyl subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ArBbnussymieuyl .L_small_initial_partial_block_ArBbnussymieuyl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ArBbnussymieuyl: orq %r8,%r8 je .L_after_reduction_ArBbnussymieuyl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ArBbnussymieuyl: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_9_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_zjmfGFrkFzfxxez vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_zjmfGFrkFzfxxez .L_16_blocks_overflow_zjmfGFrkFzfxxez: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_zjmfGFrkFzfxxez: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CEuslsjdAFEouni subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CEuslsjdAFEouni .L_small_initial_partial_block_CEuslsjdAFEouni: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CEuslsjdAFEouni: orq %r8,%r8 je .L_after_reduction_CEuslsjdAFEouni vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CEuslsjdAFEouni: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_10_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_BvDkzdlGxbqBdwD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_BvDkzdlGxbqBdwD .L_16_blocks_overflow_BvDkzdlGxbqBdwD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_BvDkzdlGxbqBdwD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FEEAGeFDucwexEe subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FEEAGeFDucwexEe .L_small_initial_partial_block_FEEAGeFDucwexEe: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FEEAGeFDucwexEe: orq %r8,%r8 je .L_after_reduction_FEEAGeFDucwexEe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FEEAGeFDucwexEe: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_11_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_wfjezxDvGpDnoFf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_wfjezxDvGpDnoFf .L_16_blocks_overflow_wfjezxDvGpDnoFf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_wfjezxDvGpDnoFf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qadlBuzdbwfpDef subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qadlBuzdbwfpDef .L_small_initial_partial_block_qadlBuzdbwfpDef: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qadlBuzdbwfpDef: orq %r8,%r8 je .L_after_reduction_qadlBuzdbwfpDef vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qadlBuzdbwfpDef: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_12_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_pbckDbEtDdqavpn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_pbckDbEtDdqavpn .L_16_blocks_overflow_pbckDbEtDdqavpn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_pbckDbEtDdqavpn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_osuccbBAbutpqse subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_osuccbBAbutpqse .L_small_initial_partial_block_osuccbBAbutpqse: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_osuccbBAbutpqse: orq %r8,%r8 je .L_after_reduction_osuccbBAbutpqse vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_osuccbBAbutpqse: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_13_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_oCotpBuspdAtjpe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_oCotpBuspdAtjpe .L_16_blocks_overflow_oCotpBuspdAtjpe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_oCotpBuspdAtjpe: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mmuEdDpgoEjulrs subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mmuEdDpgoEjulrs .L_small_initial_partial_block_mmuEdDpgoEjulrs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mmuEdDpgoEjulrs: orq %r8,%r8 je .L_after_reduction_mmuEdDpgoEjulrs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mmuEdDpgoEjulrs: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_14_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_bbvjuqrsjgdyCBn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_bbvjuqrsjgdyCBn .L_16_blocks_overflow_bbvjuqrsjgdyCBn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_bbvjuqrsjgdyCBn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_roGxlxzlgsulhzk subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_roGxlxzlgsulhzk .L_small_initial_partial_block_roGxlxzlgsulhzk: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_roGxlxzlgsulhzk: orq %r8,%r8 je .L_after_reduction_roGxlxzlgsulhzk vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_roGxlxzlgsulhzk: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_15_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_GriwFAotfyoEekC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_GriwFAotfyoEekC .L_16_blocks_overflow_GriwFAotfyoEekC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_GriwFAotfyoEekC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_huBogwgwhfClyls subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_huBogwgwhfClyls .L_small_initial_partial_block_huBogwgwhfClyls: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_huBogwgwhfClyls: orq %r8,%r8 je .L_after_reduction_huBogwgwhfClyls vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_huBogwgwhfClyls: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_16_aFwwehusEvmmDke: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_sjAcjwAAtCgmwjr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_sjAcjwAAtCgmwjr .L_16_blocks_overflow_sjAcjwAAtCgmwjr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_sjAcjwAAtCgmwjr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_ayefrejzGqbkfya: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ayefrejzGqbkfya: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ayefrejzGqbkfya: jmp .L_last_blocks_done_aFwwehusEvmmDke .L_last_num_blocks_is_0_aFwwehusEvmmDke: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_aFwwehusEvmmDke: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_jzxBnczDBxGvzop .L_message_below_equal_16_blocks_jzxBnczDBxGvzop: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_wjgmgrFcljfrexe jl .L_small_initial_num_blocks_is_7_1_wjgmgrFcljfrexe cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_wjgmgrFcljfrexe jl .L_small_initial_num_blocks_is_11_9_wjgmgrFcljfrexe cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_wjgmgrFcljfrexe cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_wjgmgrFcljfrexe cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_wjgmgrFcljfrexe jmp .L_small_initial_num_blocks_is_13_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_11_9_wjgmgrFcljfrexe: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_wjgmgrFcljfrexe cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_wjgmgrFcljfrexe jmp .L_small_initial_num_blocks_is_9_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_7_1_wjgmgrFcljfrexe: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_wjgmgrFcljfrexe jl .L_small_initial_num_blocks_is_3_1_wjgmgrFcljfrexe cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_wjgmgrFcljfrexe cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_wjgmgrFcljfrexe jmp .L_small_initial_num_blocks_is_5_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_3_1_wjgmgrFcljfrexe: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_wjgmgrFcljfrexe cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_1_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONEa(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%rax,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,8,221,199 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_FfndtjjjGEeCFEr subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FfndtjjjGEeCFEr .L_small_initial_partial_block_FfndtjjjGEeCFEr: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_FfndtjjjGEeCFEr .L_small_initial_compute_done_FfndtjjjGEeCFEr: .L_after_reduction_FfndtjjjGEeCFEr: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_2_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%rax,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,40,221,199 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EnhukCdygAFrqou subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EnhukCdygAFrqou .L_small_initial_partial_block_EnhukCdygAFrqou: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EnhukCdygAFrqou: orq %r8,%r8 je .L_after_reduction_EnhukCdygAFrqou vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_EnhukCdygAFrqou: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_3_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_impdlEsbGuAaott subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_impdlEsbGuAaott .L_small_initial_partial_block_impdlEsbGuAaott: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_impdlEsbGuAaott: orq %r8,%r8 je .L_after_reduction_impdlEsbGuAaott vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_impdlEsbGuAaott: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_4_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rBqdjBpBxxfxpoF subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rBqdjBpBxxfxpoF .L_small_initial_partial_block_rBqdjBpBxxfxpoF: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rBqdjBpBxxfxpoF: orq %r8,%r8 je .L_after_reduction_rBqdjBpBxxfxpoF vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_rBqdjBpBxxfxpoF: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_5_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,8,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vuaskFEqawsiCsj subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vuaskFEqawsiCsj .L_small_initial_partial_block_vuaskFEqawsiCsj: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vuaskFEqawsiCsj: orq %r8,%r8 je .L_after_reduction_vuaskFEqawsiCsj vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_vuaskFEqawsiCsj: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_6_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,40,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dCffBvEqzkjcfvA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dCffBvEqzkjcfvA .L_small_initial_partial_block_dCffBvEqzkjcfvA: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dCffBvEqzkjcfvA: orq %r8,%r8 je .L_after_reduction_dCffBvEqzkjcfvA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_dCffBvEqzkjcfvA: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_7_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yEllnEiichfbFDc subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yEllnEiichfbFDc .L_small_initial_partial_block_yEllnEiichfbFDc: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yEllnEiichfbFDc: orq %r8,%r8 je .L_after_reduction_yEllnEiichfbFDc vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_yEllnEiichfbFDc: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_8_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vBdxtBrlzxbaFcc subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vBdxtBrlzxbaFcc .L_small_initial_partial_block_vBdxtBrlzxbaFcc: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vBdxtBrlzxbaFcc: orq %r8,%r8 je .L_after_reduction_vBdxtBrlzxbaFcc vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_vBdxtBrlzxbaFcc: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_9_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,8,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_scfvxdenebqCdyz subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_scfvxdenebqCdyz .L_small_initial_partial_block_scfvxdenebqCdyz: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_scfvxdenebqCdyz: orq %r8,%r8 je .L_after_reduction_scfvxdenebqCdyz vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_scfvxdenebqCdyz: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_10_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,40,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cvcjsgotzqiyevA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cvcjsgotzqiyevA .L_small_initial_partial_block_cvcjsgotzqiyevA: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cvcjsgotzqiyevA: orq %r8,%r8 je .L_after_reduction_cvcjsgotzqiyevA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_cvcjsgotzqiyevA: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_11_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vuCaGGnzBCpphtu subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vuCaGGnzBCpphtu .L_small_initial_partial_block_vuCaGGnzBCpphtu: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vuCaGGnzBCpphtu: orq %r8,%r8 je .L_after_reduction_vuCaGGnzBCpphtu vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_vuCaGGnzBCpphtu: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_12_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qhFhudxmstaFEvA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qhFhudxmstaFEvA .L_small_initial_partial_block_qhFhudxmstaFEvA: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qhFhudxmstaFEvA: orq %r8,%r8 je .L_after_reduction_qhFhudxmstaFEvA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_qhFhudxmstaFEvA: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_13_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,8,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GbEgefaoCcDkbpn subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GbEgefaoCcDkbpn .L_small_initial_partial_block_GbEgefaoCcDkbpn: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GbEgefaoCcDkbpn: orq %r8,%r8 je .L_after_reduction_GbEgefaoCcDkbpn vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_GbEgefaoCcDkbpn: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_14_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,40,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hCaaAkupwhFdkkk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hCaaAkupwhFdkkk .L_small_initial_partial_block_hCaaAkupwhFdkkk: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hCaaAkupwhFdkkk: orq %r8,%r8 je .L_after_reduction_hCaaAkupwhFdkkk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_hCaaAkupwhFdkkk: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_15_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kBjkymsezzduvxc subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kBjkymsezzduvxc .L_small_initial_partial_block_kBjkymsezzduvxc: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kBjkymsezzduvxc: orq %r8,%r8 je .L_after_reduction_kBjkymsezzduvxc vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_kBjkymsezzduvxc: jmp .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe .L_small_initial_num_blocks_is_16_wjgmgrFcljfrexe: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_qxCpdapFxyCuqwj: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qxCpdapFxyCuqwj: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_qxCpdapFxyCuqwj: .L_small_initial_blocks_encrypted_wjgmgrFcljfrexe: .L_ghash_done_jzxBnczDBxGvzop: vmovdqu64 %xmm2,0(%rsi) .L_enc_dec_done_jzxBnczDBxGvzop: vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_abort_jzxBnczDBxGvzop: jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_256_avx512: orq %r8,%r8 je .L_enc_dec_abort_ralurfzeatcGxDF xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 movl (%rdx),%eax orq %rax,%rax je .L_partial_block_done_yhoCcfnksexDFbx movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 leaq 80(%rsi),%r10 vmovdqu64 240(%r10),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %rax,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%rax,1),%r13 subq $16,%r13 jge .L_no_extra_mask_yhoCcfnksexDFbx subq %r13,%r12 .L_no_extra_mask_yhoCcfnksexDFbx: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_yhoCcfnksexDFbx .byte 98,243,13,8,68,252,17 .byte 98,115,13,8,68,212,0 .byte 98,115,13,8,68,220,1 .byte 98,115,13,8,68,244,16 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 .byte 98,83,37,8,68,214,1 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 .byte 98,83,37,8,68,214,0 vpsrldq $4,%xmm10,%xmm10 .byte 98,83,37,8,68,246,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movl $0,(%rdx) movq %rax,%r12 movq $16,%rax subq %r12,%rax jmp .L_enc_dec_done_yhoCcfnksexDFbx .L_partial_incomplete_yhoCcfnksexDFbx: addl %r8d,(%rdx) movq %r8,%rax .L_enc_dec_done_yhoCcfnksexDFbx: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%rax,2),%k1 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_yhoCcfnksexDFbx: vmovdqu64 0(%rsi),%xmm2 subq %rax,%r8 je .L_enc_dec_done_ralurfzeatcGxDF cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_ralurfzeatcGxDF vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_FolitFcvmzDtzbD vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_FolitFcvmzDtzbD .L_next_16_overflow_FolitFcvmzDtzbD: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_FolitFcvmzDtzbD: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%rax,1),%zmm0 vmovdqu8 64(%rcx,%rax,1),%zmm3 vmovdqu8 128(%rcx,%rax,1),%zmm4 vmovdqu8 192(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 176(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 192(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 208(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 224(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%rax,1) vmovdqu8 %zmm10,64(%r10,%rax,1) vmovdqu8 %zmm11,128(%r10,%rax,1) vmovdqu8 %zmm12,192(%r10,%rax,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_slhsqgEufGclFec vmovdqu64 192(%r12),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 128(%r12),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 64(%r12),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 0(%r12),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_slhsqgEufGclFec: cmpq $512,%r8 jb .L_message_below_32_blocks_ralurfzeatcGxDF cmpb $240,%r15b jae .L_next_16_overflow_rpkeAoplfcmnoqe vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_rpkeAoplfcmnoqe .L_next_16_overflow_rpkeAoplfcmnoqe: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_rpkeAoplfcmnoqe: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%rax,1),%zmm0 vmovdqu8 320(%rcx,%rax,1),%zmm3 vmovdqu8 384(%rcx,%rax,1),%zmm4 vmovdqu8 448(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 176(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 192(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 208(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 224(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%rax,1) vmovdqu8 %zmm10,320(%r10,%rax,1) vmovdqu8 %zmm11,384(%r10,%rax,1) vmovdqu8 %zmm12,448(%r10,%rax,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_wDdhvEhGipECfzn vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_wDdhvEhGipECfzn: movq $1,%r14 addq $512,%rax subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_ralurfzeatcGxDF .L_encrypt_big_nblocks_ralurfzeatcGxDF: cmpb $240,%r15b jae .L_16_blocks_overflow_tcpaCgCtyttEnkC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_tcpaCgCtyttEnkC .L_16_blocks_overflow_tcpaCgCtyttEnkC: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_tcpaCgCtyttEnkC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_fefwzzFqtyGgFsy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_fefwzzFqtyGgFsy .L_16_blocks_overflow_fefwzzFqtyGgFsy: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_fefwzzFqtyGgFsy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_poCaishDCqiAtDd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_poCaishDCqiAtDd .L_16_blocks_overflow_poCaishDCqiAtDd: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_poCaishDCqiAtDd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%rax,1),%zmm17 vmovdqu8 576(%rcx,%rax,1),%zmm19 vmovdqu8 640(%rcx,%rax,1),%zmm20 vmovdqu8 704(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%rax,1) vmovdqu8 %zmm3,576(%r10,%rax,1) vmovdqu8 %zmm4,640(%r10,%rax,1) vmovdqu8 %zmm5,704(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%rax subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_ralurfzeatcGxDF .L_no_more_big_nblocks_ralurfzeatcGxDF: cmpq $512,%r8 jae .L_encrypt_32_blocks_ralurfzeatcGxDF cmpq $256,%r8 jae .L_encrypt_16_blocks_ralurfzeatcGxDF .L_encrypt_0_blocks_ghash_32_ralurfzeatcGxDF: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_xdvkfswEyEirzwB cmpl $8,%r10d je .L_last_num_blocks_is_8_xdvkfswEyEirzwB jb .L_last_num_blocks_is_7_1_xdvkfswEyEirzwB cmpl $12,%r10d je .L_last_num_blocks_is_12_xdvkfswEyEirzwB jb .L_last_num_blocks_is_11_9_xdvkfswEyEirzwB cmpl $15,%r10d je .L_last_num_blocks_is_15_xdvkfswEyEirzwB ja .L_last_num_blocks_is_16_xdvkfswEyEirzwB cmpl $14,%r10d je .L_last_num_blocks_is_14_xdvkfswEyEirzwB jmp .L_last_num_blocks_is_13_xdvkfswEyEirzwB .L_last_num_blocks_is_11_9_xdvkfswEyEirzwB: cmpl $10,%r10d je .L_last_num_blocks_is_10_xdvkfswEyEirzwB ja .L_last_num_blocks_is_11_xdvkfswEyEirzwB jmp .L_last_num_blocks_is_9_xdvkfswEyEirzwB .L_last_num_blocks_is_7_1_xdvkfswEyEirzwB: cmpl $4,%r10d je .L_last_num_blocks_is_4_xdvkfswEyEirzwB jb .L_last_num_blocks_is_3_1_xdvkfswEyEirzwB cmpl $6,%r10d ja .L_last_num_blocks_is_7_xdvkfswEyEirzwB je .L_last_num_blocks_is_6_xdvkfswEyEirzwB jmp .L_last_num_blocks_is_5_xdvkfswEyEirzwB .L_last_num_blocks_is_3_1_xdvkfswEyEirzwB: cmpl $2,%r10d ja .L_last_num_blocks_is_3_xdvkfswEyEirzwB je .L_last_num_blocks_is_2_xdvkfswEyEirzwB .L_last_num_blocks_is_1_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_igvodhikativhxs vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_igvodhikativhxs .L_16_blocks_overflow_igvodhikativhxs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_igvodhikativhxs: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_GcsipbkriaBjvfi subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GcsipbkriaBjvfi .L_small_initial_partial_block_GcsipbkriaBjvfi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_GcsipbkriaBjvfi .L_small_initial_compute_done_GcsipbkriaBjvfi: .L_after_reduction_GcsipbkriaBjvfi: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_2_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_vsprwaoekjwbkng vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_vsprwaoekjwbkng .L_16_blocks_overflow_vsprwaoekjwbkng: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_vsprwaoekjwbkng: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lhbsspkwfiDtCyr subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lhbsspkwfiDtCyr .L_small_initial_partial_block_lhbsspkwfiDtCyr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lhbsspkwfiDtCyr: orq %r8,%r8 je .L_after_reduction_lhbsspkwfiDtCyr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lhbsspkwfiDtCyr: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_3_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_pdiFfjCElAtekEv vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_pdiFfjCElAtekEv .L_16_blocks_overflow_pdiFfjCElAtekEv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_pdiFfjCElAtekEv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iyftGziCGvzBGwp subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iyftGziCGvzBGwp .L_small_initial_partial_block_iyftGziCGvzBGwp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iyftGziCGvzBGwp: orq %r8,%r8 je .L_after_reduction_iyftGziCGvzBGwp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iyftGziCGvzBGwp: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_4_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_giftEyoltvfgggA vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_giftEyoltvfgggA .L_16_blocks_overflow_giftEyoltvfgggA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_giftEyoltvfgggA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hdzCnewjxBbishd subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hdzCnewjxBbishd .L_small_initial_partial_block_hdzCnewjxBbishd: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hdzCnewjxBbishd: orq %r8,%r8 je .L_after_reduction_hdzCnewjxBbishd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hdzCnewjxBbishd: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_5_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_orpkewzlnxCGshz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_orpkewzlnxCGshz .L_16_blocks_overflow_orpkewzlnxCGshz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_orpkewzlnxCGshz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kyaoueFfnBudEhA subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kyaoueFfnBudEhA .L_small_initial_partial_block_kyaoueFfnBudEhA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kyaoueFfnBudEhA: orq %r8,%r8 je .L_after_reduction_kyaoueFfnBudEhA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kyaoueFfnBudEhA: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_6_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_orictFjAdfigdzk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_orictFjAdfigdzk .L_16_blocks_overflow_orictFjAdfigdzk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_orictFjAdfigdzk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sgBbGfbjnccbnkh subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sgBbGfbjnccbnkh .L_small_initial_partial_block_sgBbGfbjnccbnkh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sgBbGfbjnccbnkh: orq %r8,%r8 je .L_after_reduction_sgBbGfbjnccbnkh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sgBbGfbjnccbnkh: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_7_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_ivtabDnDqnrGEcy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_ivtabDnDqnrGEcy .L_16_blocks_overflow_ivtabDnDqnrGEcy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_ivtabDnDqnrGEcy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FwberbenvBxEcDE subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FwberbenvBxEcDE .L_small_initial_partial_block_FwberbenvBxEcDE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FwberbenvBxEcDE: orq %r8,%r8 je .L_after_reduction_FwberbenvBxEcDE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FwberbenvBxEcDE: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_8_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_uBiojDdgtEoAfGd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_uBiojDdgtEoAfGd .L_16_blocks_overflow_uBiojDdgtEoAfGd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_uBiojDdgtEoAfGd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_twDrbrvhowngEDr subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_twDrbrvhowngEDr .L_small_initial_partial_block_twDrbrvhowngEDr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_twDrbrvhowngEDr: orq %r8,%r8 je .L_after_reduction_twDrbrvhowngEDr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_twDrbrvhowngEDr: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_9_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_FqperxgfhBwCqDo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_FqperxgfhBwCqDo .L_16_blocks_overflow_FqperxgfhBwCqDo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_FqperxgfhBwCqDo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_agoyuAkiGwzDjns subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_agoyuAkiGwzDjns .L_small_initial_partial_block_agoyuAkiGwzDjns: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_agoyuAkiGwzDjns: orq %r8,%r8 je .L_after_reduction_agoyuAkiGwzDjns vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_agoyuAkiGwzDjns: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_10_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_bvimoanuboioxom vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_bvimoanuboioxom .L_16_blocks_overflow_bvimoanuboioxom: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_bvimoanuboioxom: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kblafGutCsvisjA subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kblafGutCsvisjA .L_small_initial_partial_block_kblafGutCsvisjA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kblafGutCsvisjA: orq %r8,%r8 je .L_after_reduction_kblafGutCsvisjA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kblafGutCsvisjA: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_11_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_DcdigDqdkAmpala vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_DcdigDqdkAmpala .L_16_blocks_overflow_DcdigDqdkAmpala: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_DcdigDqdkAmpala: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lFojEFrDvGhrqGC subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lFojEFrDvGhrqGC .L_small_initial_partial_block_lFojEFrDvGhrqGC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lFojEFrDvGhrqGC: orq %r8,%r8 je .L_after_reduction_lFojEFrDvGhrqGC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lFojEFrDvGhrqGC: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_12_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_ijmafkyicqbAgov vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_ijmafkyicqbAgov .L_16_blocks_overflow_ijmafkyicqbAgov: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_ijmafkyicqbAgov: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lbhyvEuvxtzgCqA subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lbhyvEuvxtzgCqA .L_small_initial_partial_block_lbhyvEuvxtzgCqA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lbhyvEuvxtzgCqA: orq %r8,%r8 je .L_after_reduction_lbhyvEuvxtzgCqA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lbhyvEuvxtzgCqA: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_13_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_xewjdgAADiucjCd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_xewjdgAADiucjCd .L_16_blocks_overflow_xewjdgAADiucjCd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_xewjdgAADiucjCd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ihgbCttclcmDtmF subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ihgbCttclcmDtmF .L_small_initial_partial_block_ihgbCttclcmDtmF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ihgbCttclcmDtmF: orq %r8,%r8 je .L_after_reduction_ihgbCttclcmDtmF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ihgbCttclcmDtmF: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_14_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_uxvkthhndspgdct vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_uxvkthhndspgdct .L_16_blocks_overflow_uxvkthhndspgdct: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_uxvkthhndspgdct: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_okkABmocyzkldgz subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_okkABmocyzkldgz .L_small_initial_partial_block_okkABmocyzkldgz: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_okkABmocyzkldgz: orq %r8,%r8 je .L_after_reduction_okkABmocyzkldgz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_okkABmocyzkldgz: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_15_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_fdeajBtuhuyobdz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_fdeajBtuhuyobdz .L_16_blocks_overflow_fdeajBtuhuyobdz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_fdeajBtuhuyobdz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ltEnnExvFfBwyxa subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ltEnnExvFfBwyxa .L_small_initial_partial_block_ltEnnExvFfBwyxa: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ltEnnExvFfBwyxa: orq %r8,%r8 je .L_after_reduction_ltEnnExvFfBwyxa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ltEnnExvFfBwyxa: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_16_xdvkfswEyEirzwB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_mxnyyrjuxpBhloh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_mxnyyrjuxpBhloh .L_16_blocks_overflow_mxnyyrjuxpBhloh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_mxnyyrjuxpBhloh: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_CFywctAlrBmkufB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CFywctAlrBmkufB: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CFywctAlrBmkufB: jmp .L_last_blocks_done_xdvkfswEyEirzwB .L_last_num_blocks_is_0_xdvkfswEyEirzwB: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_xdvkfswEyEirzwB: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_ralurfzeatcGxDF .L_encrypt_32_blocks_ralurfzeatcGxDF: cmpb $240,%r15b jae .L_16_blocks_overflow_maxnEmGesnybyGw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_maxnEmGesnybyGw .L_16_blocks_overflow_maxnEmGesnybyGw: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_maxnEmGesnybyGw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_caDkotybClbwqcs vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_caDkotybClbwqcs .L_16_blocks_overflow_caDkotybClbwqcs: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_caDkotybClbwqcs: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%rax movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_CpuqkplkrGqAlEE cmpl $8,%r10d je .L_last_num_blocks_is_8_CpuqkplkrGqAlEE jb .L_last_num_blocks_is_7_1_CpuqkplkrGqAlEE cmpl $12,%r10d je .L_last_num_blocks_is_12_CpuqkplkrGqAlEE jb .L_last_num_blocks_is_11_9_CpuqkplkrGqAlEE cmpl $15,%r10d je .L_last_num_blocks_is_15_CpuqkplkrGqAlEE ja .L_last_num_blocks_is_16_CpuqkplkrGqAlEE cmpl $14,%r10d je .L_last_num_blocks_is_14_CpuqkplkrGqAlEE jmp .L_last_num_blocks_is_13_CpuqkplkrGqAlEE .L_last_num_blocks_is_11_9_CpuqkplkrGqAlEE: cmpl $10,%r10d je .L_last_num_blocks_is_10_CpuqkplkrGqAlEE ja .L_last_num_blocks_is_11_CpuqkplkrGqAlEE jmp .L_last_num_blocks_is_9_CpuqkplkrGqAlEE .L_last_num_blocks_is_7_1_CpuqkplkrGqAlEE: cmpl $4,%r10d je .L_last_num_blocks_is_4_CpuqkplkrGqAlEE jb .L_last_num_blocks_is_3_1_CpuqkplkrGqAlEE cmpl $6,%r10d ja .L_last_num_blocks_is_7_CpuqkplkrGqAlEE je .L_last_num_blocks_is_6_CpuqkplkrGqAlEE jmp .L_last_num_blocks_is_5_CpuqkplkrGqAlEE .L_last_num_blocks_is_3_1_CpuqkplkrGqAlEE: cmpl $2,%r10d ja .L_last_num_blocks_is_3_CpuqkplkrGqAlEE je .L_last_num_blocks_is_2_CpuqkplkrGqAlEE .L_last_num_blocks_is_1_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_alDzwCfDlrwfuue vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_alDzwCfDlrwfuue .L_16_blocks_overflow_alDzwCfDlrwfuue: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_alDzwCfDlrwfuue: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_iBlFpkcubprtgpj subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iBlFpkcubprtgpj .L_small_initial_partial_block_iBlFpkcubprtgpj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_iBlFpkcubprtgpj .L_small_initial_compute_done_iBlFpkcubprtgpj: .L_after_reduction_iBlFpkcubprtgpj: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_2_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_nCqcfaumojsjgbp vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_nCqcfaumojsjgbp .L_16_blocks_overflow_nCqcfaumojsjgbp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_nCqcfaumojsjgbp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yceinkEjzFdqAeG subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yceinkEjzFdqAeG .L_small_initial_partial_block_yceinkEjzFdqAeG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yceinkEjzFdqAeG: orq %r8,%r8 je .L_after_reduction_yceinkEjzFdqAeG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yceinkEjzFdqAeG: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_3_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_uwpbmorybawstbl vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_uwpbmorybawstbl .L_16_blocks_overflow_uwpbmorybawstbl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_uwpbmorybawstbl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sCduuopFvdCBjgG subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sCduuopFvdCBjgG .L_small_initial_partial_block_sCduuopFvdCBjgG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sCduuopFvdCBjgG: orq %r8,%r8 je .L_after_reduction_sCduuopFvdCBjgG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sCduuopFvdCBjgG: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_4_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_vadkquwycFnaotd vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_vadkquwycFnaotd .L_16_blocks_overflow_vadkquwycFnaotd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_vadkquwycFnaotd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ivhaorpFqBawvwj subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ivhaorpFqBawvwj .L_small_initial_partial_block_ivhaorpFqBawvwj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ivhaorpFqBawvwj: orq %r8,%r8 je .L_after_reduction_ivhaorpFqBawvwj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ivhaorpFqBawvwj: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_5_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_aFkFaFcofvloukl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_aFkFaFcofvloukl .L_16_blocks_overflow_aFkFaFcofvloukl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_aFkFaFcofvloukl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DnveyAaCeDgzdCr subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DnveyAaCeDgzdCr .L_small_initial_partial_block_DnveyAaCeDgzdCr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DnveyAaCeDgzdCr: orq %r8,%r8 je .L_after_reduction_DnveyAaCeDgzdCr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DnveyAaCeDgzdCr: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_6_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_hyGBuzayqDhhsut vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_hyGBuzayqDhhsut .L_16_blocks_overflow_hyGBuzayqDhhsut: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_hyGBuzayqDhhsut: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FildbillAFDaont subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FildbillAFDaont .L_small_initial_partial_block_FildbillAFDaont: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FildbillAFDaont: orq %r8,%r8 je .L_after_reduction_FildbillAFDaont vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FildbillAFDaont: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_7_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_wfwrxhyCBsGqfaa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_wfwrxhyCBsGqfaa .L_16_blocks_overflow_wfwrxhyCBsGqfaa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_wfwrxhyCBsGqfaa: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dFDhkscmwibqAtn subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dFDhkscmwibqAtn .L_small_initial_partial_block_dFDhkscmwibqAtn: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dFDhkscmwibqAtn: orq %r8,%r8 je .L_after_reduction_dFDhkscmwibqAtn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dFDhkscmwibqAtn: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_8_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_nwCspduhyDCpabc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_nwCspduhyDCpabc .L_16_blocks_overflow_nwCspduhyDCpabc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_nwCspduhyDCpabc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_atEEroEqtkbEDxn subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_atEEroEqtkbEDxn .L_small_initial_partial_block_atEEroEqtkbEDxn: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_atEEroEqtkbEDxn: orq %r8,%r8 je .L_after_reduction_atEEroEqtkbEDxn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_atEEroEqtkbEDxn: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_9_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_FtfeaayDywckyfd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_FtfeaayDywckyfd .L_16_blocks_overflow_FtfeaayDywckyfd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_FtfeaayDywckyfd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nvbkpkefGjFjFfs subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nvbkpkefGjFjFfs .L_small_initial_partial_block_nvbkpkefGjFjFfs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nvbkpkefGjFjFfs: orq %r8,%r8 je .L_after_reduction_nvbkpkefGjFjFfs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nvbkpkefGjFjFfs: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_10_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_rwkpzgCdusgbwpC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_rwkpzgCdusgbwpC .L_16_blocks_overflow_rwkpzgCdusgbwpC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_rwkpzgCdusgbwpC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tEmDckpEuqBsraf subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tEmDckpEuqBsraf .L_small_initial_partial_block_tEmDckpEuqBsraf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tEmDckpEuqBsraf: orq %r8,%r8 je .L_after_reduction_tEmDckpEuqBsraf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tEmDckpEuqBsraf: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_11_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_lwGByppsljaznxt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_lwGByppsljaznxt .L_16_blocks_overflow_lwGByppsljaznxt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_lwGByppsljaznxt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ilixxtsukzdoAtA subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ilixxtsukzdoAtA .L_small_initial_partial_block_ilixxtsukzdoAtA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ilixxtsukzdoAtA: orq %r8,%r8 je .L_after_reduction_ilixxtsukzdoAtA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ilixxtsukzdoAtA: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_12_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_jbqznyehrlCBlqk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_jbqznyehrlCBlqk .L_16_blocks_overflow_jbqznyehrlCBlqk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_jbqznyehrlCBlqk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wctpdEkyEmpBhlB subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wctpdEkyEmpBhlB .L_small_initial_partial_block_wctpdEkyEmpBhlB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wctpdEkyEmpBhlB: orq %r8,%r8 je .L_after_reduction_wctpdEkyEmpBhlB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wctpdEkyEmpBhlB: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_13_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_zfoiakgFjhncFgz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_zfoiakgFjhncFgz .L_16_blocks_overflow_zfoiakgFjhncFgz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_zfoiakgFjhncFgz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ECBllyApvBoFquD subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ECBllyApvBoFquD .L_small_initial_partial_block_ECBllyApvBoFquD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ECBllyApvBoFquD: orq %r8,%r8 je .L_after_reduction_ECBllyApvBoFquD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ECBllyApvBoFquD: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_14_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_boaouDrBeEmAnwp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_boaouDrBeEmAnwp .L_16_blocks_overflow_boaouDrBeEmAnwp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_boaouDrBeEmAnwp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CjBwxsGswEoCtpA subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CjBwxsGswEoCtpA .L_small_initial_partial_block_CjBwxsGswEoCtpA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CjBwxsGswEoCtpA: orq %r8,%r8 je .L_after_reduction_CjBwxsGswEoCtpA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CjBwxsGswEoCtpA: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_15_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_mFdcfdxbaoeAcmw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_mFdcfdxbaoeAcmw .L_16_blocks_overflow_mFdcfdxbaoeAcmw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_mFdcfdxbaoeAcmw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nkpoxiswyhgqlsf subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nkpoxiswyhgqlsf .L_small_initial_partial_block_nkpoxiswyhgqlsf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nkpoxiswyhgqlsf: orq %r8,%r8 je .L_after_reduction_nkpoxiswyhgqlsf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nkpoxiswyhgqlsf: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_16_CpuqkplkrGqAlEE: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_dhDlEwplftmrFtf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_dhDlEwplftmrFtf .L_16_blocks_overflow_dhDlEwplftmrFtf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_dhDlEwplftmrFtf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_iuDhkykBcvvzBFb: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iuDhkykBcvvzBFb: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iuDhkykBcvvzBFb: jmp .L_last_blocks_done_CpuqkplkrGqAlEE .L_last_num_blocks_is_0_CpuqkplkrGqAlEE: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_CpuqkplkrGqAlEE: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_ralurfzeatcGxDF .L_encrypt_16_blocks_ralurfzeatcGxDF: cmpb $240,%r15b jae .L_16_blocks_overflow_FGbkcFatDxoofCE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_FGbkcFatDxoofCE .L_16_blocks_overflow_FGbkcFatDxoofCE: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_FGbkcFatDxoofCE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%rax movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_FesvdmtDyerGEdv cmpl $8,%r10d je .L_last_num_blocks_is_8_FesvdmtDyerGEdv jb .L_last_num_blocks_is_7_1_FesvdmtDyerGEdv cmpl $12,%r10d je .L_last_num_blocks_is_12_FesvdmtDyerGEdv jb .L_last_num_blocks_is_11_9_FesvdmtDyerGEdv cmpl $15,%r10d je .L_last_num_blocks_is_15_FesvdmtDyerGEdv ja .L_last_num_blocks_is_16_FesvdmtDyerGEdv cmpl $14,%r10d je .L_last_num_blocks_is_14_FesvdmtDyerGEdv jmp .L_last_num_blocks_is_13_FesvdmtDyerGEdv .L_last_num_blocks_is_11_9_FesvdmtDyerGEdv: cmpl $10,%r10d je .L_last_num_blocks_is_10_FesvdmtDyerGEdv ja .L_last_num_blocks_is_11_FesvdmtDyerGEdv jmp .L_last_num_blocks_is_9_FesvdmtDyerGEdv .L_last_num_blocks_is_7_1_FesvdmtDyerGEdv: cmpl $4,%r10d je .L_last_num_blocks_is_4_FesvdmtDyerGEdv jb .L_last_num_blocks_is_3_1_FesvdmtDyerGEdv cmpl $6,%r10d ja .L_last_num_blocks_is_7_FesvdmtDyerGEdv je .L_last_num_blocks_is_6_FesvdmtDyerGEdv jmp .L_last_num_blocks_is_5_FesvdmtDyerGEdv .L_last_num_blocks_is_3_1_FesvdmtDyerGEdv: cmpl $2,%r10d ja .L_last_num_blocks_is_3_FesvdmtDyerGEdv je .L_last_num_blocks_is_2_FesvdmtDyerGEdv .L_last_num_blocks_is_1_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_cmjbanhfxFrrojy vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_cmjbanhfxFrrojy .L_16_blocks_overflow_cmjbanhfxFrrojy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_cmjbanhfxFrrojy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,8,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_EGeAwrlgtsiFljf subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EGeAwrlgtsiFljf .L_small_initial_partial_block_EGeAwrlgtsiFljf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_EGeAwrlgtsiFljf .L_small_initial_compute_done_EGeAwrlgtsiFljf: .L_after_reduction_EGeAwrlgtsiFljf: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_2_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_EgjyoropybwcGcn vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_EgjyoropybwcGcn .L_16_blocks_overflow_EgjyoropybwcGcn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_EgjyoropybwcGcn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,40,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rrppsiDyiwwbqbf subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rrppsiDyiwwbqbf .L_small_initial_partial_block_rrppsiDyiwwbqbf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rrppsiDyiwwbqbf: orq %r8,%r8 je .L_after_reduction_rrppsiDyiwwbqbf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rrppsiDyiwwbqbf: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_3_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_wGGmGvscmpGfnny vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_wGGmGvscmpGfnny .L_16_blocks_overflow_wGGmGvscmpGfnny: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_wGGmGvscmpGfnny: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pFvDrkCwqwAamnn subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pFvDrkCwqwAamnn .L_small_initial_partial_block_pFvDrkCwqwAamnn: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pFvDrkCwqwAamnn: orq %r8,%r8 je .L_after_reduction_pFvDrkCwqwAamnn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pFvDrkCwqwAamnn: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_4_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_lnowafuogaacgct vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_lnowafuogaacgct .L_16_blocks_overflow_lnowafuogaacgct: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_lnowafuogaacgct: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yqgqaEocfqiFkDi subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yqgqaEocfqiFkDi .L_small_initial_partial_block_yqgqaEocfqiFkDi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yqgqaEocfqiFkDi: orq %r8,%r8 je .L_after_reduction_yqgqaEocfqiFkDi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yqgqaEocfqiFkDi: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_5_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_trmgpGgtzmsExiu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_trmgpGgtzmsExiu .L_16_blocks_overflow_trmgpGgtzmsExiu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_trmgpGgtzmsExiu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vuyopzdEphdnacq subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vuyopzdEphdnacq .L_small_initial_partial_block_vuyopzdEphdnacq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vuyopzdEphdnacq: orq %r8,%r8 je .L_after_reduction_vuyopzdEphdnacq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vuyopzdEphdnacq: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_6_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_FwaeBcDAewBtpAB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_FwaeBcDAewBtpAB .L_16_blocks_overflow_FwaeBcDAewBtpAB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_FwaeBcDAewBtpAB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rdtAwwiDCCqmaAa subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rdtAwwiDCCqmaAa .L_small_initial_partial_block_rdtAwwiDCCqmaAa: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rdtAwwiDCCqmaAa: orq %r8,%r8 je .L_after_reduction_rdtAwwiDCCqmaAa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rdtAwwiDCCqmaAa: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_7_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_AnyscuqxAspkzsl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_AnyscuqxAspkzsl .L_16_blocks_overflow_AnyscuqxAspkzsl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_AnyscuqxAspkzsl: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_digiiCypcjzldxx subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_digiiCypcjzldxx .L_small_initial_partial_block_digiiCypcjzldxx: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_digiiCypcjzldxx: orq %r8,%r8 je .L_after_reduction_digiiCypcjzldxx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_digiiCypcjzldxx: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_8_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_cgqpkbbBmprdEnv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_cgqpkbbBmprdEnv .L_16_blocks_overflow_cgqpkbbBmprdEnv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_cgqpkbbBmprdEnv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sEakaptGjtmocyA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sEakaptGjtmocyA .L_small_initial_partial_block_sEakaptGjtmocyA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sEakaptGjtmocyA: orq %r8,%r8 je .L_after_reduction_sEakaptGjtmocyA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sEakaptGjtmocyA: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_9_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_ovcajrDEfpdjwcF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_ovcajrDEfpdjwcF .L_16_blocks_overflow_ovcajrDEfpdjwcF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_ovcajrDEfpdjwcF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wwoArvEqahCsDin subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wwoArvEqahCsDin .L_small_initial_partial_block_wwoArvEqahCsDin: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wwoArvEqahCsDin: orq %r8,%r8 je .L_after_reduction_wwoArvEqahCsDin vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wwoArvEqahCsDin: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_10_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_xyisBwjDghCtkcq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_xyisBwjDghCtkcq .L_16_blocks_overflow_xyisBwjDghCtkcq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_xyisBwjDghCtkcq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_maGzmchmgBAsGGp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_maGzmchmgBAsGGp .L_small_initial_partial_block_maGzmchmgBAsGGp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_maGzmchmgBAsGGp: orq %r8,%r8 je .L_after_reduction_maGzmchmgBAsGGp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_maGzmchmgBAsGGp: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_11_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_oCaueqhtnkiqikA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_oCaueqhtnkiqikA .L_16_blocks_overflow_oCaueqhtnkiqikA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_oCaueqhtnkiqikA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rwuhidithmAtnfF subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rwuhidithmAtnfF .L_small_initial_partial_block_rwuhidithmAtnfF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rwuhidithmAtnfF: orq %r8,%r8 je .L_after_reduction_rwuhidithmAtnfF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rwuhidithmAtnfF: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_12_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_xwjsvxAnBhmckaz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_xwjsvxAnBhmckaz .L_16_blocks_overflow_xwjsvxAnBhmckaz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_xwjsvxAnBhmckaz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_glqGCCyiublvFga subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_glqGCCyiublvFga .L_small_initial_partial_block_glqGCCyiublvFga: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_glqGCCyiublvFga: orq %r8,%r8 je .L_after_reduction_glqGCCyiublvFga vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_glqGCCyiublvFga: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_13_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_jfgktdduAaBgqFv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_jfgktdduAaBgqFv .L_16_blocks_overflow_jfgktdduAaBgqFv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_jfgktdduAaBgqFv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_deedxboGavqljAa subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_deedxboGavqljAa .L_small_initial_partial_block_deedxboGavqljAa: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_deedxboGavqljAa: orq %r8,%r8 je .L_after_reduction_deedxboGavqljAa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_deedxboGavqljAa: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_14_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_xdtrxodfgwcifbm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_xdtrxodfgwcifbm .L_16_blocks_overflow_xdtrxodfgwcifbm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_xdtrxodfgwcifbm: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lhfnbffaAGncxjA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lhfnbffaAGncxjA .L_small_initial_partial_block_lhfnbffaAGncxjA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lhfnbffaAGncxjA: orq %r8,%r8 je .L_after_reduction_lhfnbffaAGncxjA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lhfnbffaAGncxjA: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_15_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_FrBtEqtdGyajfFu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_FrBtEqtdGyajfFu .L_16_blocks_overflow_FrBtEqtdGyajfFu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_FrBtEqtdGyajfFu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DlintgAmylyraad subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DlintgAmylyraad .L_small_initial_partial_block_DlintgAmylyraad: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DlintgAmylyraad: orq %r8,%r8 je .L_after_reduction_DlintgAmylyraad vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DlintgAmylyraad: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_16_FesvdmtDyerGEdv: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_ofhxurlakbuiiab vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ofhxurlakbuiiab .L_16_blocks_overflow_ofhxurlakbuiiab: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ofhxurlakbuiiab: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_niAfluBnEgrukbj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_niAfluBnEgrukbj: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_niAfluBnEgrukbj: jmp .L_last_blocks_done_FesvdmtDyerGEdv .L_last_num_blocks_is_0_FesvdmtDyerGEdv: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_FesvdmtDyerGEdv: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_ralurfzeatcGxDF .L_message_below_32_blocks_ralurfzeatcGxDF: subq $256,%r8 addq $256,%rax movl %r8d,%r10d leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_BiAvfDwrflaDzBx vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_BiAvfDwrflaDzBx: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_pnzuldcucuyingq cmpl $8,%r10d je .L_last_num_blocks_is_8_pnzuldcucuyingq jb .L_last_num_blocks_is_7_1_pnzuldcucuyingq cmpl $12,%r10d je .L_last_num_blocks_is_12_pnzuldcucuyingq jb .L_last_num_blocks_is_11_9_pnzuldcucuyingq cmpl $15,%r10d je .L_last_num_blocks_is_15_pnzuldcucuyingq ja .L_last_num_blocks_is_16_pnzuldcucuyingq cmpl $14,%r10d je .L_last_num_blocks_is_14_pnzuldcucuyingq jmp .L_last_num_blocks_is_13_pnzuldcucuyingq .L_last_num_blocks_is_11_9_pnzuldcucuyingq: cmpl $10,%r10d je .L_last_num_blocks_is_10_pnzuldcucuyingq ja .L_last_num_blocks_is_11_pnzuldcucuyingq jmp .L_last_num_blocks_is_9_pnzuldcucuyingq .L_last_num_blocks_is_7_1_pnzuldcucuyingq: cmpl $4,%r10d je .L_last_num_blocks_is_4_pnzuldcucuyingq jb .L_last_num_blocks_is_3_1_pnzuldcucuyingq cmpl $6,%r10d ja .L_last_num_blocks_is_7_pnzuldcucuyingq je .L_last_num_blocks_is_6_pnzuldcucuyingq jmp .L_last_num_blocks_is_5_pnzuldcucuyingq .L_last_num_blocks_is_3_1_pnzuldcucuyingq: cmpl $2,%r10d ja .L_last_num_blocks_is_3_pnzuldcucuyingq je .L_last_num_blocks_is_2_pnzuldcucuyingq .L_last_num_blocks_is_1_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_zDsbocmrpEvnicC vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_zDsbocmrpEvnicC .L_16_blocks_overflow_zDsbocmrpEvnicC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_zDsbocmrpEvnicC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_GkBxoqrqufclksk subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GkBxoqrqufclksk .L_small_initial_partial_block_GkBxoqrqufclksk: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_GkBxoqrqufclksk .L_small_initial_compute_done_GkBxoqrqufclksk: .L_after_reduction_GkBxoqrqufclksk: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_2_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_fkqtFBuohiwoapu vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_fkqtFBuohiwoapu .L_16_blocks_overflow_fkqtFBuohiwoapu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_fkqtFBuohiwoapu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vxviotokbwbgyEt subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vxviotokbwbgyEt .L_small_initial_partial_block_vxviotokbwbgyEt: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vxviotokbwbgyEt: orq %r8,%r8 je .L_after_reduction_vxviotokbwbgyEt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vxviotokbwbgyEt: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_3_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_myfxreEhmAEiFvd vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_myfxreEhmAEiFvd .L_16_blocks_overflow_myfxreEhmAEiFvd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_myfxreEhmAEiFvd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cvvlAqBdybFdjiy subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cvvlAqBdybFdjiy .L_small_initial_partial_block_cvvlAqBdybFdjiy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cvvlAqBdybFdjiy: orq %r8,%r8 je .L_after_reduction_cvvlAqBdybFdjiy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cvvlAqBdybFdjiy: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_4_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_EshcbGrbbBjGmFs vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_EshcbGrbbBjGmFs .L_16_blocks_overflow_EshcbGrbbBjGmFs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_EshcbGrbbBjGmFs: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GfeakfatCkpGtjm subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GfeakfatCkpGtjm .L_small_initial_partial_block_GfeakfatCkpGtjm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GfeakfatCkpGtjm: orq %r8,%r8 je .L_after_reduction_GfeakfatCkpGtjm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GfeakfatCkpGtjm: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_5_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_rBzncCcAACDmBwu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_rBzncCcAACDmBwu .L_16_blocks_overflow_rBzncCcAACDmBwu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_rBzncCcAACDmBwu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AstwCzCrFBsuGAb subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AstwCzCrFBsuGAb .L_small_initial_partial_block_AstwCzCrFBsuGAb: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AstwCzCrFBsuGAb: orq %r8,%r8 je .L_after_reduction_AstwCzCrFBsuGAb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AstwCzCrFBsuGAb: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_6_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_yghnlDweoeGyiyD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_yghnlDweoeGyiyD .L_16_blocks_overflow_yghnlDweoeGyiyD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_yghnlDweoeGyiyD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nAqArzgnghAposf subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nAqArzgnghAposf .L_small_initial_partial_block_nAqArzgnghAposf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nAqArzgnghAposf: orq %r8,%r8 je .L_after_reduction_nAqArzgnghAposf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nAqArzgnghAposf: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_7_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_stoalvbzsyrkrBC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_stoalvbzsyrkrBC .L_16_blocks_overflow_stoalvbzsyrkrBC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_stoalvbzsyrkrBC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tvAfmkadqFgykwd subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tvAfmkadqFgykwd .L_small_initial_partial_block_tvAfmkadqFgykwd: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tvAfmkadqFgykwd: orq %r8,%r8 je .L_after_reduction_tvAfmkadqFgykwd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tvAfmkadqFgykwd: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_8_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_miFDzcCBFGrssiv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_miFDzcCBFGrssiv .L_16_blocks_overflow_miFDzcCBFGrssiv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_miFDzcCBFGrssiv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dnvdvgGCEkvixhc subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dnvdvgGCEkvixhc .L_small_initial_partial_block_dnvdvgGCEkvixhc: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dnvdvgGCEkvixhc: orq %r8,%r8 je .L_after_reduction_dnvdvgGCEkvixhc vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dnvdvgGCEkvixhc: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_9_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_lkCdskAdsidpkuw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_lkCdskAdsidpkuw .L_16_blocks_overflow_lkCdskAdsidpkuw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_lkCdskAdsidpkuw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BvuayrqCbqotfzl subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BvuayrqCbqotfzl .L_small_initial_partial_block_BvuayrqCbqotfzl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BvuayrqCbqotfzl: orq %r8,%r8 je .L_after_reduction_BvuayrqCbqotfzl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BvuayrqCbqotfzl: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_10_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_hktAeBlvDcCnios vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_hktAeBlvDcCnios .L_16_blocks_overflow_hktAeBlvDcCnios: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_hktAeBlvDcCnios: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qDkapAwwDbttzcj subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qDkapAwwDbttzcj .L_small_initial_partial_block_qDkapAwwDbttzcj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qDkapAwwDbttzcj: orq %r8,%r8 je .L_after_reduction_qDkapAwwDbttzcj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qDkapAwwDbttzcj: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_11_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_bblFcfwEgdzswCm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_bblFcfwEgdzswCm .L_16_blocks_overflow_bblFcfwEgdzswCm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_bblFcfwEgdzswCm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hxkcdbddneddmzb subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hxkcdbddneddmzb .L_small_initial_partial_block_hxkcdbddneddmzb: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hxkcdbddneddmzb: orq %r8,%r8 je .L_after_reduction_hxkcdbddneddmzb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hxkcdbddneddmzb: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_12_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_qmmgmehghErCGvF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_qmmgmehghErCGvF .L_16_blocks_overflow_qmmgmehghErCGvF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_qmmgmehghErCGvF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bGACCFiDoxkcuwq subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bGACCFiDoxkcuwq .L_small_initial_partial_block_bGACCFiDoxkcuwq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bGACCFiDoxkcuwq: orq %r8,%r8 je .L_after_reduction_bGACCFiDoxkcuwq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bGACCFiDoxkcuwq: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_13_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_dulzkutdgjakGvB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_dulzkutdgjakGvB .L_16_blocks_overflow_dulzkutdgjakGvB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_dulzkutdgjakGvB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dmbcxBEdtigsClF subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dmbcxBEdtigsClF .L_small_initial_partial_block_dmbcxBEdtigsClF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dmbcxBEdtigsClF: orq %r8,%r8 je .L_after_reduction_dmbcxBEdtigsClF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dmbcxBEdtigsClF: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_14_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_nntbrGkellunBas vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_nntbrGkellunBas .L_16_blocks_overflow_nntbrGkellunBas: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_nntbrGkellunBas: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aopDguzqabquECi subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aopDguzqabquECi .L_small_initial_partial_block_aopDguzqabquECi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aopDguzqabquECi: orq %r8,%r8 je .L_after_reduction_aopDguzqabquECi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aopDguzqabquECi: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_15_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_gqGDtzmCceFkfal vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_gqGDtzmCceFkfal .L_16_blocks_overflow_gqGDtzmCceFkfal: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_gqGDtzmCceFkfal: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kkvugeyiFsBldFy subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kkvugeyiFsBldFy .L_small_initial_partial_block_kkvugeyiFsBldFy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kkvugeyiFsBldFy: orq %r8,%r8 je .L_after_reduction_kkvugeyiFsBldFy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kkvugeyiFsBldFy: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_16_pnzuldcucuyingq: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_nnArmAxpgvlqCpA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_nnArmAxpgvlqCpA .L_16_blocks_overflow_nnArmAxpgvlqCpA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_nnArmAxpgvlqCpA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_uqdvluxFgGqdFqv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uqdvluxFgGqdFqv: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uqdvluxFgGqdFqv: jmp .L_last_blocks_done_pnzuldcucuyingq .L_last_num_blocks_is_0_pnzuldcucuyingq: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_pnzuldcucuyingq: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_ralurfzeatcGxDF .L_message_below_equal_16_blocks_ralurfzeatcGxDF: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_hdjaAabmubhzgrE jl .L_small_initial_num_blocks_is_7_1_hdjaAabmubhzgrE cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_hdjaAabmubhzgrE jl .L_small_initial_num_blocks_is_11_9_hdjaAabmubhzgrE cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_hdjaAabmubhzgrE cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_hdjaAabmubhzgrE cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_hdjaAabmubhzgrE jmp .L_small_initial_num_blocks_is_13_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_11_9_hdjaAabmubhzgrE: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_hdjaAabmubhzgrE cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_hdjaAabmubhzgrE jmp .L_small_initial_num_blocks_is_9_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_7_1_hdjaAabmubhzgrE: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_hdjaAabmubhzgrE jl .L_small_initial_num_blocks_is_3_1_hdjaAabmubhzgrE cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_hdjaAabmubhzgrE cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_hdjaAabmubhzgrE jmp .L_small_initial_num_blocks_is_5_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_3_1_hdjaAabmubhzgrE: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_hdjaAabmubhzgrE cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_1_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONEa(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%rax,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,8,221,199 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_oglzypDCtpAhyGa subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_oglzypDCtpAhyGa .L_small_initial_partial_block_oglzypDCtpAhyGa: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_oglzypDCtpAhyGa .L_small_initial_compute_done_oglzypDCtpAhyGa: .L_after_reduction_oglzypDCtpAhyGa: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_2_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%rax,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,40,221,199 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mwbBGGvalpfhfnw subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mwbBGGvalpfhfnw .L_small_initial_partial_block_mwbBGGvalpfhfnw: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mwbBGGvalpfhfnw: orq %r8,%r8 je .L_after_reduction_mwbBGGvalpfhfnw vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_mwbBGGvalpfhfnw: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_3_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_njdmEDjqDqutzfl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_njdmEDjqDqutzfl .L_small_initial_partial_block_njdmEDjqDqutzfl: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_njdmEDjqDqutzfl: orq %r8,%r8 je .L_after_reduction_njdmEDjqDqutzfl vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_njdmEDjqDqutzfl: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_4_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EsFwhCqwxAhrvFa subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EsFwhCqwxAhrvFa .L_small_initial_partial_block_EsFwhCqwxAhrvFa: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EsFwhCqwxAhrvFa: orq %r8,%r8 je .L_after_reduction_EsFwhCqwxAhrvFa vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_EsFwhCqwxAhrvFa: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_5_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,8,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aiubnzDkbAjBaGt subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aiubnzDkbAjBaGt .L_small_initial_partial_block_aiubnzDkbAjBaGt: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aiubnzDkbAjBaGt: orq %r8,%r8 je .L_after_reduction_aiubnzDkbAjBaGt vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_aiubnzDkbAjBaGt: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_6_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,40,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GodgzzxioGrdAeg subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GodgzzxioGrdAeg .L_small_initial_partial_block_GodgzzxioGrdAeg: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GodgzzxioGrdAeg: orq %r8,%r8 je .L_after_reduction_GodgzzxioGrdAeg vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_GodgzzxioGrdAeg: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_7_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jmqmzBeujCAjAxl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jmqmzBeujCAjAxl .L_small_initial_partial_block_jmqmzBeujCAjAxl: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jmqmzBeujCAjAxl: orq %r8,%r8 je .L_after_reduction_jmqmzBeujCAjAxl vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_jmqmzBeujCAjAxl: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_8_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lGwzbjEigiuyrxp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lGwzbjEigiuyrxp .L_small_initial_partial_block_lGwzbjEigiuyrxp: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lGwzbjEigiuyrxp: orq %r8,%r8 je .L_after_reduction_lGwzbjEigiuyrxp vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_lGwzbjEigiuyrxp: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_9_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,8,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wtbpkxoFvlcvhkk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wtbpkxoFvlcvhkk .L_small_initial_partial_block_wtbpkxoFvlcvhkk: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wtbpkxoFvlcvhkk: orq %r8,%r8 je .L_after_reduction_wtbpkxoFvlcvhkk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_wtbpkxoFvlcvhkk: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_10_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,40,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zDgaEerElzafAjF subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zDgaEerElzafAjF .L_small_initial_partial_block_zDgaEerElzafAjF: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zDgaEerElzafAjF: orq %r8,%r8 je .L_after_reduction_zDgaEerElzafAjF vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_zDgaEerElzafAjF: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_11_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BrkzfboGqlhyAvb subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BrkzfboGqlhyAvb .L_small_initial_partial_block_BrkzfboGqlhyAvb: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BrkzfboGqlhyAvb: orq %r8,%r8 je .L_after_reduction_BrkzfboGqlhyAvb vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_BrkzfboGqlhyAvb: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_12_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wBuxadGqDBDeard subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wBuxadGqDBDeard .L_small_initial_partial_block_wBuxadGqDBDeard: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wBuxadGqDBDeard: orq %r8,%r8 je .L_after_reduction_wBuxadGqDBDeard vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_wBuxadGqDBDeard: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_13_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,8,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_koBzdarsEboqwan subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_koBzdarsEboqwan .L_small_initial_partial_block_koBzdarsEboqwan: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_koBzdarsEboqwan: orq %r8,%r8 je .L_after_reduction_koBzdarsEboqwan vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_koBzdarsEboqwan: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_14_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,40,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EhgwDyGvdzvgvtp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EhgwDyGvdzvgvtp .L_small_initial_partial_block_EhgwDyGvdzvgvtp: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EhgwDyGvdzvgvtp: orq %r8,%r8 je .L_after_reduction_EhgwDyGvdzvgvtp vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_EhgwDyGvdzvgvtp: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_15_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dguvDqtayFqucCq subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dguvDqtayFqucCq .L_small_initial_partial_block_dguvDqtayFqucCq: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dguvDqtayFqucCq: orq %r8,%r8 je .L_after_reduction_dguvDqtayFqucCq vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_dguvDqtayFqucCq: jmp .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE .L_small_initial_num_blocks_is_16_hdjaAabmubhzgrE: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_mlladecCGcaEame: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mlladecCGcaEame: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_mlladecCGcaEame: .L_small_initial_blocks_encrypted_hdjaAabmubhzgrE: .L_ghash_done_ralurfzeatcGxDF: vmovdqu64 %xmm2,0(%rsi) .L_enc_dec_done_ralurfzeatcGxDF: vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_abort_ralurfzeatcGxDF: jmp .Lexit_gcm_encrypt .Lexit_gcm_encrypt: cmpq $256,%r8 jbe .Lskip_hkeys_cleanup_cccrurCdlggtEnk vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_cccrurCdlggtEnk: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .byte 0xf3,0xc3 .Lencrypt_seh_end: .cfi_endproc .size aes_gcm_encrypt_avx512, .-aes_gcm_encrypt_avx512 .globl aes_gcm_decrypt_avx512 .hidden aes_gcm_decrypt_avx512 .type aes_gcm_decrypt_avx512,@function .align 32 aes_gcm_decrypt_avx512: .cfi_startproc .Ldecrypt_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Ldecrypt_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Ldecrypt_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Ldecrypt_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Ldecrypt_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Ldecrypt_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Ldecrypt_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Ldecrypt_seh_setfp: .Ldecrypt_seh_prolog_end: subq $1588,%rsp andq $(-64),%rsp movl 240(%rdi),%eax cmpl $9,%eax je .Laes_gcm_decrypt_128_avx512 cmpl $11,%eax je .Laes_gcm_decrypt_192_avx512 cmpl $13,%eax je .Laes_gcm_decrypt_256_avx512 xorl %eax,%eax jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_128_avx512: orq %r8,%r8 je .L_enc_dec_abort_icBhFhCkojGgnBc xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 movl (%rdx),%eax orq %rax,%rax je .L_partial_block_done_Cwuafefseqcgife movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 leaq 80(%rsi),%r10 vmovdqu64 240(%r10),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %rax,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%rax,1),%r13 subq $16,%r13 jge .L_no_extra_mask_Cwuafefseqcgife subq %r13,%r12 .L_no_extra_mask_Cwuafefseqcgife: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_Cwuafefseqcgife .byte 98,243,13,8,68,252,17 .byte 98,115,13,8,68,212,0 .byte 98,115,13,8,68,220,1 .byte 98,115,13,8,68,244,16 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 .byte 98,83,37,8,68,214,1 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 .byte 98,83,37,8,68,214,0 vpsrldq $4,%xmm10,%xmm10 .byte 98,83,37,8,68,246,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movl $0,(%rdx) movq %rax,%r12 movq $16,%rax subq %r12,%rax jmp .L_enc_dec_done_Cwuafefseqcgife .L_partial_incomplete_Cwuafefseqcgife: addl %r8d,(%rdx) movq %r8,%rax .L_enc_dec_done_Cwuafefseqcgife: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%rax,2),%k1 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_Cwuafefseqcgife: vmovdqu64 0(%rsi),%xmm2 subq %rax,%r8 je .L_enc_dec_done_icBhFhCkojGgnBc cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_icBhFhCkojGgnBc vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_DkBvliAEspzoabf vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_DkBvliAEspzoabf .L_next_16_overflow_DkBvliAEspzoabf: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_DkBvliAEspzoabf: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%rax,1),%zmm0 vmovdqu8 64(%rcx,%rax,1),%zmm3 vmovdqu8 128(%rcx,%rax,1),%zmm4 vmovdqu8 192(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%rax,1) vmovdqu8 %zmm10,64(%r10,%rax,1) vmovdqu8 %zmm11,128(%r10,%rax,1) vmovdqu8 %zmm12,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_yDAnEECuuGxfwvr vmovdqu64 192(%r12),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 128(%r12),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 64(%r12),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 0(%r12),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_yDAnEECuuGxfwvr: cmpq $512,%r8 jb .L_message_below_32_blocks_icBhFhCkojGgnBc cmpb $240,%r15b jae .L_next_16_overflow_lgmzdneusufrFmr vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_lgmzdneusufrFmr .L_next_16_overflow_lgmzdneusufrFmr: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_lgmzdneusufrFmr: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%rax,1),%zmm0 vmovdqu8 320(%rcx,%rax,1),%zmm3 vmovdqu8 384(%rcx,%rax,1),%zmm4 vmovdqu8 448(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%rax,1) vmovdqu8 %zmm10,320(%r10,%rax,1) vmovdqu8 %zmm11,384(%r10,%rax,1) vmovdqu8 %zmm12,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_ecaeaEFhspgwivG vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_ecaeaEFhspgwivG: movq $1,%r14 addq $512,%rax subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_icBhFhCkojGgnBc .L_encrypt_big_nblocks_icBhFhCkojGgnBc: cmpb $240,%r15b jae .L_16_blocks_overflow_ApzaumldtosGeir vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ApzaumldtosGeir .L_16_blocks_overflow_ApzaumldtosGeir: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ApzaumldtosGeir: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_ubdpEpmjBbFwdEm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ubdpEpmjBbFwdEm .L_16_blocks_overflow_ubdpEpmjBbFwdEm: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ubdpEpmjBbFwdEm: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_EdBasfawgBetkCB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_EdBasfawgBetkCB .L_16_blocks_overflow_EdBasfawgBetkCB: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_EdBasfawgBetkCB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%rax,1),%zmm17 vmovdqu8 576(%rcx,%rax,1),%zmm19 vmovdqu8 640(%rcx,%rax,1),%zmm20 vmovdqu8 704(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%rax,1) vmovdqu8 %zmm3,576(%r10,%rax,1) vmovdqu8 %zmm4,640(%r10,%rax,1) vmovdqu8 %zmm5,704(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%rax subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_icBhFhCkojGgnBc .L_no_more_big_nblocks_icBhFhCkojGgnBc: cmpq $512,%r8 jae .L_encrypt_32_blocks_icBhFhCkojGgnBc cmpq $256,%r8 jae .L_encrypt_16_blocks_icBhFhCkojGgnBc .L_encrypt_0_blocks_ghash_32_icBhFhCkojGgnBc: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_lGwozrmByuyygbo cmpl $8,%r10d je .L_last_num_blocks_is_8_lGwozrmByuyygbo jb .L_last_num_blocks_is_7_1_lGwozrmByuyygbo cmpl $12,%r10d je .L_last_num_blocks_is_12_lGwozrmByuyygbo jb .L_last_num_blocks_is_11_9_lGwozrmByuyygbo cmpl $15,%r10d je .L_last_num_blocks_is_15_lGwozrmByuyygbo ja .L_last_num_blocks_is_16_lGwozrmByuyygbo cmpl $14,%r10d je .L_last_num_blocks_is_14_lGwozrmByuyygbo jmp .L_last_num_blocks_is_13_lGwozrmByuyygbo .L_last_num_blocks_is_11_9_lGwozrmByuyygbo: cmpl $10,%r10d je .L_last_num_blocks_is_10_lGwozrmByuyygbo ja .L_last_num_blocks_is_11_lGwozrmByuyygbo jmp .L_last_num_blocks_is_9_lGwozrmByuyygbo .L_last_num_blocks_is_7_1_lGwozrmByuyygbo: cmpl $4,%r10d je .L_last_num_blocks_is_4_lGwozrmByuyygbo jb .L_last_num_blocks_is_3_1_lGwozrmByuyygbo cmpl $6,%r10d ja .L_last_num_blocks_is_7_lGwozrmByuyygbo je .L_last_num_blocks_is_6_lGwozrmByuyygbo jmp .L_last_num_blocks_is_5_lGwozrmByuyygbo .L_last_num_blocks_is_3_1_lGwozrmByuyygbo: cmpl $2,%r10d ja .L_last_num_blocks_is_3_lGwozrmByuyygbo je .L_last_num_blocks_is_2_lGwozrmByuyygbo .L_last_num_blocks_is_1_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_lClAAkfGiaxqtqb vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_lClAAkfGiaxqtqb .L_16_blocks_overflow_lClAAkfGiaxqtqb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_lClAAkfGiaxqtqb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_AljhFopbDmohEEm subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AljhFopbDmohEEm .L_small_initial_partial_block_AljhFopbDmohEEm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_AljhFopbDmohEEm .L_small_initial_compute_done_AljhFopbDmohEEm: .L_after_reduction_AljhFopbDmohEEm: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_2_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_dxfcclgCzfhujoB vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_dxfcclgCzfhujoB .L_16_blocks_overflow_dxfcclgCzfhujoB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_dxfcclgCzfhujoB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mhEghxGxhmrFGgF subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mhEghxGxhmrFGgF .L_small_initial_partial_block_mhEghxGxhmrFGgF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mhEghxGxhmrFGgF: orq %r8,%r8 je .L_after_reduction_mhEghxGxhmrFGgF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mhEghxGxhmrFGgF: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_3_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_GzfdDtolkqgqFel vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_GzfdDtolkqgqFel .L_16_blocks_overflow_GzfdDtolkqgqFel: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_GzfdDtolkqgqFel: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qwrgpmqrxkxvCzs subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qwrgpmqrxkxvCzs .L_small_initial_partial_block_qwrgpmqrxkxvCzs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qwrgpmqrxkxvCzs: orq %r8,%r8 je .L_after_reduction_qwrgpmqrxkxvCzs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qwrgpmqrxkxvCzs: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_4_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_tFlldonsxgiBeEi vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_tFlldonsxgiBeEi .L_16_blocks_overflow_tFlldonsxgiBeEi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_tFlldonsxgiBeEi: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gDoehbqfcmrseCg subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gDoehbqfcmrseCg .L_small_initial_partial_block_gDoehbqfcmrseCg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gDoehbqfcmrseCg: orq %r8,%r8 je .L_after_reduction_gDoehbqfcmrseCg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gDoehbqfcmrseCg: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_5_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_lAbhcGDwivukqtE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_lAbhcGDwivukqtE .L_16_blocks_overflow_lAbhcGDwivukqtE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_lAbhcGDwivukqtE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ewowyEuhltFopkj subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ewowyEuhltFopkj .L_small_initial_partial_block_ewowyEuhltFopkj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ewowyEuhltFopkj: orq %r8,%r8 je .L_after_reduction_ewowyEuhltFopkj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ewowyEuhltFopkj: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_6_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_xsoFcrclantxpei vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_xsoFcrclantxpei .L_16_blocks_overflow_xsoFcrclantxpei: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_xsoFcrclantxpei: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lzfnkiFifvcmjit subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lzfnkiFifvcmjit .L_small_initial_partial_block_lzfnkiFifvcmjit: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lzfnkiFifvcmjit: orq %r8,%r8 je .L_after_reduction_lzfnkiFifvcmjit vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lzfnkiFifvcmjit: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_7_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_xeeduBscFEzvdva vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_xeeduBscFEzvdva .L_16_blocks_overflow_xeeduBscFEzvdva: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_xeeduBscFEzvdva: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DrhotzwvddbqFrj subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DrhotzwvddbqFrj .L_small_initial_partial_block_DrhotzwvddbqFrj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DrhotzwvddbqFrj: orq %r8,%r8 je .L_after_reduction_DrhotzwvddbqFrj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DrhotzwvddbqFrj: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_8_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_kBlrofzDjhoFnxv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_kBlrofzDjhoFnxv .L_16_blocks_overflow_kBlrofzDjhoFnxv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_kBlrofzDjhoFnxv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pAdxDizkcbwmjry subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pAdxDizkcbwmjry .L_small_initial_partial_block_pAdxDizkcbwmjry: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pAdxDizkcbwmjry: orq %r8,%r8 je .L_after_reduction_pAdxDizkcbwmjry vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pAdxDizkcbwmjry: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_9_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_whsqDBkDGaknbAC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_whsqDBkDGaknbAC .L_16_blocks_overflow_whsqDBkDGaknbAC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_whsqDBkDGaknbAC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ranhBavDwnwbdEt subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ranhBavDwnwbdEt .L_small_initial_partial_block_ranhBavDwnwbdEt: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ranhBavDwnwbdEt: orq %r8,%r8 je .L_after_reduction_ranhBavDwnwbdEt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ranhBavDwnwbdEt: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_10_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_GbBbalFokmrvvlx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_GbBbalFokmrvvlx .L_16_blocks_overflow_GbBbalFokmrvvlx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_GbBbalFokmrvvlx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vyxjFnxqwhAbeyi subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vyxjFnxqwhAbeyi .L_small_initial_partial_block_vyxjFnxqwhAbeyi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vyxjFnxqwhAbeyi: orq %r8,%r8 je .L_after_reduction_vyxjFnxqwhAbeyi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vyxjFnxqwhAbeyi: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_11_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_ldEsDEbywdmplpt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_ldEsDEbywdmplpt .L_16_blocks_overflow_ldEsDEbywdmplpt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_ldEsDEbywdmplpt: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dwbzkfjluwpFvvF subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dwbzkfjluwpFvvF .L_small_initial_partial_block_dwbzkfjluwpFvvF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dwbzkfjluwpFvvF: orq %r8,%r8 je .L_after_reduction_dwbzkfjluwpFvvF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dwbzkfjluwpFvvF: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_12_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_rAsEscwvsFrjwEn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_rAsEscwvsFrjwEn .L_16_blocks_overflow_rAsEscwvsFrjwEn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_rAsEscwvsFrjwEn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qqseyimgvencorf subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qqseyimgvencorf .L_small_initial_partial_block_qqseyimgvencorf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qqseyimgvencorf: orq %r8,%r8 je .L_after_reduction_qqseyimgvencorf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qqseyimgvencorf: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_13_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_yuCmdhwEwEhlsnq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_yuCmdhwEwEhlsnq .L_16_blocks_overflow_yuCmdhwEwEhlsnq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_yuCmdhwEwEhlsnq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qwyeDgkiECyoEct subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qwyeDgkiECyoEct .L_small_initial_partial_block_qwyeDgkiECyoEct: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qwyeDgkiECyoEct: orq %r8,%r8 je .L_after_reduction_qwyeDgkiECyoEct vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qwyeDgkiECyoEct: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_14_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_oEwrswoqGyjlsqe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_oEwrswoqGyjlsqe .L_16_blocks_overflow_oEwrswoqGyjlsqe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_oEwrswoqGyjlsqe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FvgcfpdwFDaojDh subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FvgcfpdwFDaojDh .L_small_initial_partial_block_FvgcfpdwFDaojDh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FvgcfpdwFDaojDh: orq %r8,%r8 je .L_after_reduction_FvgcfpdwFDaojDh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FvgcfpdwFDaojDh: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_15_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_CtjhmwDvAgBsAry vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_CtjhmwDvAgBsAry .L_16_blocks_overflow_CtjhmwDvAgBsAry: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_CtjhmwDvAgBsAry: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rertkxjeyegEbAD subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rertkxjeyegEbAD .L_small_initial_partial_block_rertkxjeyegEbAD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rertkxjeyegEbAD: orq %r8,%r8 je .L_after_reduction_rertkxjeyegEbAD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rertkxjeyegEbAD: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_16_lGwozrmByuyygbo: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_ejwsGBcDyFeryCA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ejwsGBcDyFeryCA .L_16_blocks_overflow_ejwsGBcDyFeryCA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ejwsGBcDyFeryCA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_vqjlBldpifEzCAi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vqjlBldpifEzCAi: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vqjlBldpifEzCAi: jmp .L_last_blocks_done_lGwozrmByuyygbo .L_last_num_blocks_is_0_lGwozrmByuyygbo: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_lGwozrmByuyygbo: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_icBhFhCkojGgnBc .L_encrypt_32_blocks_icBhFhCkojGgnBc: cmpb $240,%r15b jae .L_16_blocks_overflow_bqdrbusADEaesxh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_bqdrbusADEaesxh .L_16_blocks_overflow_bqdrbusADEaesxh: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_bqdrbusADEaesxh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_jsiAuvqcAwfrdty vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_jsiAuvqcAwfrdty .L_16_blocks_overflow_jsiAuvqcAwfrdty: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_jsiAuvqcAwfrdty: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%rax movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_aldutenGmyuhmFz cmpl $8,%r10d je .L_last_num_blocks_is_8_aldutenGmyuhmFz jb .L_last_num_blocks_is_7_1_aldutenGmyuhmFz cmpl $12,%r10d je .L_last_num_blocks_is_12_aldutenGmyuhmFz jb .L_last_num_blocks_is_11_9_aldutenGmyuhmFz cmpl $15,%r10d je .L_last_num_blocks_is_15_aldutenGmyuhmFz ja .L_last_num_blocks_is_16_aldutenGmyuhmFz cmpl $14,%r10d je .L_last_num_blocks_is_14_aldutenGmyuhmFz jmp .L_last_num_blocks_is_13_aldutenGmyuhmFz .L_last_num_blocks_is_11_9_aldutenGmyuhmFz: cmpl $10,%r10d je .L_last_num_blocks_is_10_aldutenGmyuhmFz ja .L_last_num_blocks_is_11_aldutenGmyuhmFz jmp .L_last_num_blocks_is_9_aldutenGmyuhmFz .L_last_num_blocks_is_7_1_aldutenGmyuhmFz: cmpl $4,%r10d je .L_last_num_blocks_is_4_aldutenGmyuhmFz jb .L_last_num_blocks_is_3_1_aldutenGmyuhmFz cmpl $6,%r10d ja .L_last_num_blocks_is_7_aldutenGmyuhmFz je .L_last_num_blocks_is_6_aldutenGmyuhmFz jmp .L_last_num_blocks_is_5_aldutenGmyuhmFz .L_last_num_blocks_is_3_1_aldutenGmyuhmFz: cmpl $2,%r10d ja .L_last_num_blocks_is_3_aldutenGmyuhmFz je .L_last_num_blocks_is_2_aldutenGmyuhmFz .L_last_num_blocks_is_1_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_kounvuokEjmfgDl vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_kounvuokEjmfgDl .L_16_blocks_overflow_kounvuokEjmfgDl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_kounvuokEjmfgDl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_myyjGGFduxDnmrl subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_myyjGGFduxDnmrl .L_small_initial_partial_block_myyjGGFduxDnmrl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_myyjGGFduxDnmrl .L_small_initial_compute_done_myyjGGFduxDnmrl: .L_after_reduction_myyjGGFduxDnmrl: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_2_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_GkcjorkhgDBFApE vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_GkcjorkhgDBFApE .L_16_blocks_overflow_GkcjorkhgDBFApE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_GkcjorkhgDBFApE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_luGrBrcBwGbkypf subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_luGrBrcBwGbkypf .L_small_initial_partial_block_luGrBrcBwGbkypf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_luGrBrcBwGbkypf: orq %r8,%r8 je .L_after_reduction_luGrBrcBwGbkypf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_luGrBrcBwGbkypf: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_3_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_GlGoAfCtaxDnccC vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_GlGoAfCtaxDnccC .L_16_blocks_overflow_GlGoAfCtaxDnccC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_GlGoAfCtaxDnccC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hjFElydehDprmun subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hjFElydehDprmun .L_small_initial_partial_block_hjFElydehDprmun: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hjFElydehDprmun: orq %r8,%r8 je .L_after_reduction_hjFElydehDprmun vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hjFElydehDprmun: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_4_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_zwfpgGyijsBkpeE vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_zwfpgGyijsBkpeE .L_16_blocks_overflow_zwfpgGyijsBkpeE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_zwfpgGyijsBkpeE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ftaDveFCagABhCd subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ftaDveFCagABhCd .L_small_initial_partial_block_ftaDveFCagABhCd: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ftaDveFCagABhCd: orq %r8,%r8 je .L_after_reduction_ftaDveFCagABhCd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ftaDveFCagABhCd: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_5_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_CizAwbkEgozyasa vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_CizAwbkEgozyasa .L_16_blocks_overflow_CizAwbkEgozyasa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_CizAwbkEgozyasa: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uvigeFCkFhxrjol subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uvigeFCkFhxrjol .L_small_initial_partial_block_uvigeFCkFhxrjol: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uvigeFCkFhxrjol: orq %r8,%r8 je .L_after_reduction_uvigeFCkFhxrjol vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uvigeFCkFhxrjol: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_6_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_yuzbpkwFyzjuBAz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_yuzbpkwFyzjuBAz .L_16_blocks_overflow_yuzbpkwFyzjuBAz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_yuzbpkwFyzjuBAz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mGnxEwEsoAvgkoh subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mGnxEwEsoAvgkoh .L_small_initial_partial_block_mGnxEwEsoAvgkoh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mGnxEwEsoAvgkoh: orq %r8,%r8 je .L_after_reduction_mGnxEwEsoAvgkoh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mGnxEwEsoAvgkoh: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_7_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_fDccaFllCyjzgaw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_fDccaFllCyjzgaw .L_16_blocks_overflow_fDccaFllCyjzgaw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_fDccaFllCyjzgaw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_awcpyfsBbqeAyhp subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_awcpyfsBbqeAyhp .L_small_initial_partial_block_awcpyfsBbqeAyhp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_awcpyfsBbqeAyhp: orq %r8,%r8 je .L_after_reduction_awcpyfsBbqeAyhp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_awcpyfsBbqeAyhp: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_8_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_yuxjCAwGGjlocDt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_yuxjCAwGGjlocDt .L_16_blocks_overflow_yuxjCAwGGjlocDt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_yuxjCAwGGjlocDt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tCmmipfvAEinBtG subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tCmmipfvAEinBtG .L_small_initial_partial_block_tCmmipfvAEinBtG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tCmmipfvAEinBtG: orq %r8,%r8 je .L_after_reduction_tCmmipfvAEinBtG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tCmmipfvAEinBtG: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_9_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_FrborCeuBByFkga vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_FrborCeuBByFkga .L_16_blocks_overflow_FrborCeuBByFkga: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_FrborCeuBByFkga: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rhmklrqdhsjaixG subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rhmklrqdhsjaixG .L_small_initial_partial_block_rhmklrqdhsjaixG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rhmklrqdhsjaixG: orq %r8,%r8 je .L_after_reduction_rhmklrqdhsjaixG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rhmklrqdhsjaixG: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_10_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_uqpvEzAtlprmDsg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_uqpvEzAtlprmDsg .L_16_blocks_overflow_uqpvEzAtlprmDsg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_uqpvEzAtlprmDsg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yslffbaddFCEqwA subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yslffbaddFCEqwA .L_small_initial_partial_block_yslffbaddFCEqwA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yslffbaddFCEqwA: orq %r8,%r8 je .L_after_reduction_yslffbaddFCEqwA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yslffbaddFCEqwA: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_11_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_wyBrnxyfcdFguiF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_wyBrnxyfcdFguiF .L_16_blocks_overflow_wyBrnxyfcdFguiF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_wyBrnxyfcdFguiF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_axnFjCbcEhxjDmF subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_axnFjCbcEhxjDmF .L_small_initial_partial_block_axnFjCbcEhxjDmF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_axnFjCbcEhxjDmF: orq %r8,%r8 je .L_after_reduction_axnFjCbcEhxjDmF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_axnFjCbcEhxjDmF: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_12_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_nbfsGzmFjniAhpc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_nbfsGzmFjniAhpc .L_16_blocks_overflow_nbfsGzmFjniAhpc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_nbfsGzmFjniAhpc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nmuwjreDfxCetjh subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nmuwjreDfxCetjh .L_small_initial_partial_block_nmuwjreDfxCetjh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nmuwjreDfxCetjh: orq %r8,%r8 je .L_after_reduction_nmuwjreDfxCetjh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nmuwjreDfxCetjh: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_13_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_BlpixnjkGtBtzBl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_BlpixnjkGtBtzBl .L_16_blocks_overflow_BlpixnjkGtBtzBl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_BlpixnjkGtBtzBl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FdqyplwEjyoxvwf subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FdqyplwEjyoxvwf .L_small_initial_partial_block_FdqyplwEjyoxvwf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FdqyplwEjyoxvwf: orq %r8,%r8 je .L_after_reduction_FdqyplwEjyoxvwf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FdqyplwEjyoxvwf: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_14_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_nlkisqljGgnlewr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_nlkisqljGgnlewr .L_16_blocks_overflow_nlkisqljGgnlewr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_nlkisqljGgnlewr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ByszoDfuCgvEska subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ByszoDfuCgvEska .L_small_initial_partial_block_ByszoDfuCgvEska: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ByszoDfuCgvEska: orq %r8,%r8 je .L_after_reduction_ByszoDfuCgvEska vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ByszoDfuCgvEska: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_15_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_FewkqxwDmrjetmG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_FewkqxwDmrjetmG .L_16_blocks_overflow_FewkqxwDmrjetmG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_FewkqxwDmrjetmG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jjbxCtvydaGqepC subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jjbxCtvydaGqepC .L_small_initial_partial_block_jjbxCtvydaGqepC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jjbxCtvydaGqepC: orq %r8,%r8 je .L_after_reduction_jjbxCtvydaGqepC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jjbxCtvydaGqepC: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_16_aldutenGmyuhmFz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_hEoxzbghGBmpbpw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_hEoxzbghGBmpbpw .L_16_blocks_overflow_hEoxzbghGBmpbpw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_hEoxzbghGBmpbpw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_usEFihDgqghhogg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_usEFihDgqghhogg: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_usEFihDgqghhogg: jmp .L_last_blocks_done_aldutenGmyuhmFz .L_last_num_blocks_is_0_aldutenGmyuhmFz: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_aldutenGmyuhmFz: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_icBhFhCkojGgnBc .L_encrypt_16_blocks_icBhFhCkojGgnBc: cmpb $240,%r15b jae .L_16_blocks_overflow_xlvtosuhBBytzsd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_xlvtosuhBBytzsd .L_16_blocks_overflow_xlvtosuhBBytzsd: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_xlvtosuhBBytzsd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%rax movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_CqqjsGobDovpiom cmpl $8,%r10d je .L_last_num_blocks_is_8_CqqjsGobDovpiom jb .L_last_num_blocks_is_7_1_CqqjsGobDovpiom cmpl $12,%r10d je .L_last_num_blocks_is_12_CqqjsGobDovpiom jb .L_last_num_blocks_is_11_9_CqqjsGobDovpiom cmpl $15,%r10d je .L_last_num_blocks_is_15_CqqjsGobDovpiom ja .L_last_num_blocks_is_16_CqqjsGobDovpiom cmpl $14,%r10d je .L_last_num_blocks_is_14_CqqjsGobDovpiom jmp .L_last_num_blocks_is_13_CqqjsGobDovpiom .L_last_num_blocks_is_11_9_CqqjsGobDovpiom: cmpl $10,%r10d je .L_last_num_blocks_is_10_CqqjsGobDovpiom ja .L_last_num_blocks_is_11_CqqjsGobDovpiom jmp .L_last_num_blocks_is_9_CqqjsGobDovpiom .L_last_num_blocks_is_7_1_CqqjsGobDovpiom: cmpl $4,%r10d je .L_last_num_blocks_is_4_CqqjsGobDovpiom jb .L_last_num_blocks_is_3_1_CqqjsGobDovpiom cmpl $6,%r10d ja .L_last_num_blocks_is_7_CqqjsGobDovpiom je .L_last_num_blocks_is_6_CqqjsGobDovpiom jmp .L_last_num_blocks_is_5_CqqjsGobDovpiom .L_last_num_blocks_is_3_1_CqqjsGobDovpiom: cmpl $2,%r10d ja .L_last_num_blocks_is_3_CqqjsGobDovpiom je .L_last_num_blocks_is_2_CqqjsGobDovpiom .L_last_num_blocks_is_1_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_iCcBbEaCnnBtiGz vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_iCcBbEaCnnBtiGz .L_16_blocks_overflow_iCcBbEaCnnBtiGz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_iCcBbEaCnnBtiGz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,8,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_himCBxsCzdjqdtp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_himCBxsCzdjqdtp .L_small_initial_partial_block_himCBxsCzdjqdtp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_himCBxsCzdjqdtp .L_small_initial_compute_done_himCBxsCzdjqdtp: .L_after_reduction_himCBxsCzdjqdtp: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_2_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_spdFufbAAGcAxFf vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_spdFufbAAGcAxFf .L_16_blocks_overflow_spdFufbAAGcAxFf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_spdFufbAAGcAxFf: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,40,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vdGxihcuFDvcDGx subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vdGxihcuFDvcDGx .L_small_initial_partial_block_vdGxihcuFDvcDGx: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vdGxihcuFDvcDGx: orq %r8,%r8 je .L_after_reduction_vdGxihcuFDvcDGx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vdGxihcuFDvcDGx: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_3_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_sBAazunogzDjqho vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_sBAazunogzDjqho .L_16_blocks_overflow_sBAazunogzDjqho: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_sBAazunogzDjqho: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tldtpncdejgAGjh subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tldtpncdejgAGjh .L_small_initial_partial_block_tldtpncdejgAGjh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tldtpncdejgAGjh: orq %r8,%r8 je .L_after_reduction_tldtpncdejgAGjh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tldtpncdejgAGjh: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_4_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_sekyjhofosAtkyB vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_sekyjhofosAtkyB .L_16_blocks_overflow_sekyjhofosAtkyB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_sekyjhofosAtkyB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wkomnalwByedats subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wkomnalwByedats .L_small_initial_partial_block_wkomnalwByedats: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wkomnalwByedats: orq %r8,%r8 je .L_after_reduction_wkomnalwByedats vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wkomnalwByedats: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_5_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_zdkGskjaniDljeq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_zdkGskjaniDljeq .L_16_blocks_overflow_zdkGskjaniDljeq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_zdkGskjaniDljeq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nCkgxpzwqEAtDfb subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nCkgxpzwqEAtDfb .L_small_initial_partial_block_nCkgxpzwqEAtDfb: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nCkgxpzwqEAtDfb: orq %r8,%r8 je .L_after_reduction_nCkgxpzwqEAtDfb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nCkgxpzwqEAtDfb: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_6_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_mrylAcnDjuqklnd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_mrylAcnDjuqklnd .L_16_blocks_overflow_mrylAcnDjuqklnd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_mrylAcnDjuqklnd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dtDgucpjyaambao subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dtDgucpjyaambao .L_small_initial_partial_block_dtDgucpjyaambao: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dtDgucpjyaambao: orq %r8,%r8 je .L_after_reduction_dtDgucpjyaambao vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dtDgucpjyaambao: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_7_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_ektccsvjwlnFwnw vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_ektccsvjwlnFwnw .L_16_blocks_overflow_ektccsvjwlnFwnw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_ektccsvjwlnFwnw: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rbfqryodaBgimfn subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rbfqryodaBgimfn .L_small_initial_partial_block_rbfqryodaBgimfn: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rbfqryodaBgimfn: orq %r8,%r8 je .L_after_reduction_rbfqryodaBgimfn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rbfqryodaBgimfn: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_8_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_GGmuDhkjBtqxcEd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_GGmuDhkjBtqxcEd .L_16_blocks_overflow_GGmuDhkjBtqxcEd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_GGmuDhkjBtqxcEd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aapCFFxCFiAoabs subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aapCFFxCFiAoabs .L_small_initial_partial_block_aapCFFxCFiAoabs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aapCFFxCFiAoabs: orq %r8,%r8 je .L_after_reduction_aapCFFxCFiAoabs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aapCFFxCFiAoabs: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_9_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_lDwlixsAzhAgDkG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_lDwlixsAzhAgDkG .L_16_blocks_overflow_lDwlixsAzhAgDkG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_lDwlixsAzhAgDkG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ixzDxvojEApEnCt subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ixzDxvojEApEnCt .L_small_initial_partial_block_ixzDxvojEApEnCt: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ixzDxvojEApEnCt: orq %r8,%r8 je .L_after_reduction_ixzDxvojEApEnCt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ixzDxvojEApEnCt: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_10_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_BbbzknmqtuDuEfg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_BbbzknmqtuDuEfg .L_16_blocks_overflow_BbbzknmqtuDuEfg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_BbbzknmqtuDuEfg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ocgDwclfceuanoy subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ocgDwclfceuanoy .L_small_initial_partial_block_ocgDwclfceuanoy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ocgDwclfceuanoy: orq %r8,%r8 je .L_after_reduction_ocgDwclfceuanoy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ocgDwclfceuanoy: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_11_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_jatgakEfrDmqCyG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_jatgakEfrDmqCyG .L_16_blocks_overflow_jatgakEfrDmqCyG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_jatgakEfrDmqCyG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tklDcEsdEdnDloA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tklDcEsdEdnDloA .L_small_initial_partial_block_tklDcEsdEdnDloA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tklDcEsdEdnDloA: orq %r8,%r8 je .L_after_reduction_tklDcEsdEdnDloA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tklDcEsdEdnDloA: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_12_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_tovGfhABebkuFEt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_tovGfhABebkuFEt .L_16_blocks_overflow_tovGfhABebkuFEt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_tovGfhABebkuFEt: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EEeschrlAysrrgg subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EEeschrlAysrrgg .L_small_initial_partial_block_EEeschrlAysrrgg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EEeschrlAysrrgg: orq %r8,%r8 je .L_after_reduction_EEeschrlAysrrgg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EEeschrlAysrrgg: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_13_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_ChCrwqCswoEpicz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_ChCrwqCswoEpicz .L_16_blocks_overflow_ChCrwqCswoEpicz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_ChCrwqCswoEpicz: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iBgxbAnxnejeaAD subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iBgxbAnxnejeaAD .L_small_initial_partial_block_iBgxbAnxnejeaAD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iBgxbAnxnejeaAD: orq %r8,%r8 je .L_after_reduction_iBgxbAnxnejeaAD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iBgxbAnxnejeaAD: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_14_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_GzibzgsizEbkyAE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_GzibzgsizEbkyAE .L_16_blocks_overflow_GzibzgsizEbkyAE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_GzibzgsizEbkyAE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ulArrmByoEAEezF subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ulArrmByoEAEezF .L_small_initial_partial_block_ulArrmByoEAEezF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ulArrmByoEAEezF: orq %r8,%r8 je .L_after_reduction_ulArrmByoEAEezF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ulArrmByoEAEezF: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_15_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_DExqfkaBzzhxtrd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_DExqfkaBzzhxtrd .L_16_blocks_overflow_DExqfkaBzzhxtrd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_DExqfkaBzzhxtrd: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zqfGgrfeCzzwkzB subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zqfGgrfeCzzwkzB .L_small_initial_partial_block_zqfGgrfeCzzwkzB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zqfGgrfeCzzwkzB: orq %r8,%r8 je .L_after_reduction_zqfGgrfeCzzwkzB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zqfGgrfeCzzwkzB: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_16_CqqjsGobDovpiom: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_fanaekDAulfkhcb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_fanaekDAulfkhcb .L_16_blocks_overflow_fanaekDAulfkhcb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_fanaekDAulfkhcb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_pCDjmBApGDgFGhw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pCDjmBApGDgFGhw: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pCDjmBApGDgFGhw: jmp .L_last_blocks_done_CqqjsGobDovpiom .L_last_num_blocks_is_0_CqqjsGobDovpiom: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_CqqjsGobDovpiom: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_icBhFhCkojGgnBc .L_message_below_32_blocks_icBhFhCkojGgnBc: subq $256,%r8 addq $256,%rax movl %r8d,%r10d leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_lurmstfAeByrDpz vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_lurmstfAeByrDpz: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_kpvFtqCzpagsbmy cmpl $8,%r10d je .L_last_num_blocks_is_8_kpvFtqCzpagsbmy jb .L_last_num_blocks_is_7_1_kpvFtqCzpagsbmy cmpl $12,%r10d je .L_last_num_blocks_is_12_kpvFtqCzpagsbmy jb .L_last_num_blocks_is_11_9_kpvFtqCzpagsbmy cmpl $15,%r10d je .L_last_num_blocks_is_15_kpvFtqCzpagsbmy ja .L_last_num_blocks_is_16_kpvFtqCzpagsbmy cmpl $14,%r10d je .L_last_num_blocks_is_14_kpvFtqCzpagsbmy jmp .L_last_num_blocks_is_13_kpvFtqCzpagsbmy .L_last_num_blocks_is_11_9_kpvFtqCzpagsbmy: cmpl $10,%r10d je .L_last_num_blocks_is_10_kpvFtqCzpagsbmy ja .L_last_num_blocks_is_11_kpvFtqCzpagsbmy jmp .L_last_num_blocks_is_9_kpvFtqCzpagsbmy .L_last_num_blocks_is_7_1_kpvFtqCzpagsbmy: cmpl $4,%r10d je .L_last_num_blocks_is_4_kpvFtqCzpagsbmy jb .L_last_num_blocks_is_3_1_kpvFtqCzpagsbmy cmpl $6,%r10d ja .L_last_num_blocks_is_7_kpvFtqCzpagsbmy je .L_last_num_blocks_is_6_kpvFtqCzpagsbmy jmp .L_last_num_blocks_is_5_kpvFtqCzpagsbmy .L_last_num_blocks_is_3_1_kpvFtqCzpagsbmy: cmpl $2,%r10d ja .L_last_num_blocks_is_3_kpvFtqCzpagsbmy je .L_last_num_blocks_is_2_kpvFtqCzpagsbmy .L_last_num_blocks_is_1_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_erjqEcdgnsabCAp vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_erjqEcdgnsabCAp .L_16_blocks_overflow_erjqEcdgnsabCAp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_erjqEcdgnsabCAp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_tihlhrngdnEcfCn subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tihlhrngdnEcfCn .L_small_initial_partial_block_tihlhrngdnEcfCn: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_tihlhrngdnEcfCn .L_small_initial_compute_done_tihlhrngdnEcfCn: .L_after_reduction_tihlhrngdnEcfCn: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_2_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_AibviGpsltwhwck vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_AibviGpsltwhwck .L_16_blocks_overflow_AibviGpsltwhwck: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_AibviGpsltwhwck: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CGytiedGuwlshAl subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CGytiedGuwlshAl .L_small_initial_partial_block_CGytiedGuwlshAl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CGytiedGuwlshAl: orq %r8,%r8 je .L_after_reduction_CGytiedGuwlshAl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CGytiedGuwlshAl: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_3_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_cwyoDiaxggCofzt vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_cwyoDiaxggCofzt .L_16_blocks_overflow_cwyoDiaxggCofzt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_cwyoDiaxggCofzt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_khdhzwEsobgrlgi subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_khdhzwEsobgrlgi .L_small_initial_partial_block_khdhzwEsobgrlgi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_khdhzwEsobgrlgi: orq %r8,%r8 je .L_after_reduction_khdhzwEsobgrlgi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_khdhzwEsobgrlgi: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_4_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_fqeFwlbvdGyejoA vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_fqeFwlbvdGyejoA .L_16_blocks_overflow_fqeFwlbvdGyejoA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_fqeFwlbvdGyejoA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bzlErbuuhovEdpE subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bzlErbuuhovEdpE .L_small_initial_partial_block_bzlErbuuhovEdpE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bzlErbuuhovEdpE: orq %r8,%r8 je .L_after_reduction_bzlErbuuhovEdpE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bzlErbuuhovEdpE: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_5_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_cjnavuxfcgGzzCb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_cjnavuxfcgGzzCb .L_16_blocks_overflow_cjnavuxfcgGzzCb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_cjnavuxfcgGzzCb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eaEmrzsvCBDlnpC subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eaEmrzsvCBDlnpC .L_small_initial_partial_block_eaEmrzsvCBDlnpC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eaEmrzsvCBDlnpC: orq %r8,%r8 je .L_after_reduction_eaEmrzsvCBDlnpC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eaEmrzsvCBDlnpC: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_6_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_DndbknmyrzkriDg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_DndbknmyrzkriDg .L_16_blocks_overflow_DndbknmyrzkriDg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_DndbknmyrzkriDg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_geoBBGllnatlCqq subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_geoBBGllnatlCqq .L_small_initial_partial_block_geoBBGllnatlCqq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_geoBBGllnatlCqq: orq %r8,%r8 je .L_after_reduction_geoBBGllnatlCqq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_geoBBGllnatlCqq: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_7_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_jtGaGqFaokuwcFo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_jtGaGqFaokuwcFo .L_16_blocks_overflow_jtGaGqFaokuwcFo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_jtGaGqFaokuwcFo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xkeqvjpCBEjlkGx subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xkeqvjpCBEjlkGx .L_small_initial_partial_block_xkeqvjpCBEjlkGx: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xkeqvjpCBEjlkGx: orq %r8,%r8 je .L_after_reduction_xkeqvjpCBEjlkGx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xkeqvjpCBEjlkGx: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_8_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_BCegvazduGiwBqv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_BCegvazduGiwBqv .L_16_blocks_overflow_BCegvazduGiwBqv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_BCegvazduGiwBqv: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fjDnDwdgfswBjwp subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fjDnDwdgfswBjwp .L_small_initial_partial_block_fjDnDwdgfswBjwp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fjDnDwdgfswBjwp: orq %r8,%r8 je .L_after_reduction_fjDnDwdgfswBjwp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fjDnDwdgfswBjwp: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_9_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_nGczFFdvDDdbdAl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_nGczFFdvDDdbdAl .L_16_blocks_overflow_nGczFFdvDDdbdAl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_nGczFFdvDDdbdAl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FEodvvDmqnsbxoz subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FEodvvDmqnsbxoz .L_small_initial_partial_block_FEodvvDmqnsbxoz: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FEodvvDmqnsbxoz: orq %r8,%r8 je .L_after_reduction_FEodvvDmqnsbxoz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FEodvvDmqnsbxoz: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_10_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_oulxbBotdhvdFbg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_oulxbBotdhvdFbg .L_16_blocks_overflow_oulxbBotdhvdFbg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_oulxbBotdhvdFbg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mAhDuzfffzBcqnw subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mAhDuzfffzBcqnw .L_small_initial_partial_block_mAhDuzfffzBcqnw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mAhDuzfffzBcqnw: orq %r8,%r8 je .L_after_reduction_mAhDuzfffzBcqnw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mAhDuzfffzBcqnw: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_11_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_nCertFgkfoCxtun vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_nCertFgkfoCxtun .L_16_blocks_overflow_nCertFgkfoCxtun: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_nCertFgkfoCxtun: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dygAbwCGlokBzAu subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dygAbwCGlokBzAu .L_small_initial_partial_block_dygAbwCGlokBzAu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dygAbwCGlokBzAu: orq %r8,%r8 je .L_after_reduction_dygAbwCGlokBzAu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dygAbwCGlokBzAu: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_12_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_DtwkcFbdCfdcCrh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_DtwkcFbdCfdcCrh .L_16_blocks_overflow_DtwkcFbdCfdcCrh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_DtwkcFbdCfdcCrh: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uelgeBErnEDceCF subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uelgeBErnEDceCF .L_small_initial_partial_block_uelgeBErnEDceCF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uelgeBErnEDceCF: orq %r8,%r8 je .L_after_reduction_uelgeBErnEDceCF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_uelgeBErnEDceCF: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_13_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_ndumifgEEuiqDiF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_ndumifgEEuiqDiF .L_16_blocks_overflow_ndumifgEEuiqDiF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_ndumifgEEuiqDiF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DrrvrAjlkiwmAzx subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DrrvrAjlkiwmAzx .L_small_initial_partial_block_DrrvrAjlkiwmAzx: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DrrvrAjlkiwmAzx: orq %r8,%r8 je .L_after_reduction_DrrvrAjlkiwmAzx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DrrvrAjlkiwmAzx: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_14_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_osDGzgifEhqjECm vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_osDGzgifEhqjECm .L_16_blocks_overflow_osDGzgifEhqjECm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_osDGzgifEhqjECm: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fwjCFubGdkywpFz subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fwjCFubGdkywpFz .L_small_initial_partial_block_fwjCFubGdkywpFz: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fwjCFubGdkywpFz: orq %r8,%r8 je .L_after_reduction_fwjCFubGdkywpFz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fwjCFubGdkywpFz: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_15_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_tiCBFudBnEgekda vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_tiCBFudBnEgekda .L_16_blocks_overflow_tiCBFudBnEgekda: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_tiCBFudBnEgekda: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_sbduutzwEklCDpB subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_sbduutzwEklCDpB .L_small_initial_partial_block_sbduutzwEklCDpB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_sbduutzwEklCDpB: orq %r8,%r8 je .L_after_reduction_sbduutzwEklCDpB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_sbduutzwEklCDpB: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_16_kpvFtqCzpagsbmy: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_ennneCoBjzBsijF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ennneCoBjzBsijF .L_16_blocks_overflow_ennneCoBjzBsijF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ennneCoBjzBsijF: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_qbevliloqkkkFsD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qbevliloqkkkFsD: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qbevliloqkkkFsD: jmp .L_last_blocks_done_kpvFtqCzpagsbmy .L_last_num_blocks_is_0_kpvFtqCzpagsbmy: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_kpvFtqCzpagsbmy: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_icBhFhCkojGgnBc .L_message_below_equal_16_blocks_icBhFhCkojGgnBc: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_fAioGdenAmmvupb jl .L_small_initial_num_blocks_is_7_1_fAioGdenAmmvupb cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_fAioGdenAmmvupb jl .L_small_initial_num_blocks_is_11_9_fAioGdenAmmvupb cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_fAioGdenAmmvupb cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_fAioGdenAmmvupb cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_fAioGdenAmmvupb jmp .L_small_initial_num_blocks_is_13_fAioGdenAmmvupb .L_small_initial_num_blocks_is_11_9_fAioGdenAmmvupb: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_fAioGdenAmmvupb cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_fAioGdenAmmvupb jmp .L_small_initial_num_blocks_is_9_fAioGdenAmmvupb .L_small_initial_num_blocks_is_7_1_fAioGdenAmmvupb: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_fAioGdenAmmvupb jl .L_small_initial_num_blocks_is_3_1_fAioGdenAmmvupb cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_fAioGdenAmmvupb cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_fAioGdenAmmvupb jmp .L_small_initial_num_blocks_is_5_fAioGdenAmmvupb .L_small_initial_num_blocks_is_3_1_fAioGdenAmmvupb: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_fAioGdenAmmvupb cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_fAioGdenAmmvupb .L_small_initial_num_blocks_is_1_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONEa(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%rax,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,8,221,199 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_adigDqnunatgwqg subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_adigDqnunatgwqg .L_small_initial_partial_block_adigDqnunatgwqg: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_adigDqnunatgwqg .L_small_initial_compute_done_adigDqnunatgwqg: .L_after_reduction_adigDqnunatgwqg: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_2_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%rax,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,40,221,199 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wijsfgBfoycrhbf subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wijsfgBfoycrhbf .L_small_initial_partial_block_wijsfgBfoycrhbf: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wijsfgBfoycrhbf: orq %r8,%r8 je .L_after_reduction_wijsfgBfoycrhbf vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_wijsfgBfoycrhbf: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_3_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ibqkzvjmvrGthss subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ibqkzvjmvrGthss .L_small_initial_partial_block_ibqkzvjmvrGthss: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ibqkzvjmvrGthss: orq %r8,%r8 je .L_after_reduction_ibqkzvjmvrGthss vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ibqkzvjmvrGthss: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_4_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xlbdECcsDitBbrC subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xlbdECcsDitBbrC .L_small_initial_partial_block_xlbdECcsDitBbrC: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xlbdECcsDitBbrC: orq %r8,%r8 je .L_after_reduction_xlbdECcsDitBbrC vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_xlbdECcsDitBbrC: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_5_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,8,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AvBuAcGaAAhviww subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AvBuAcGaAAhviww .L_small_initial_partial_block_AvBuAcGaAAhviww: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AvBuAcGaAAhviww: orq %r8,%r8 je .L_after_reduction_AvBuAcGaAAhviww vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_AvBuAcGaAAhviww: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_6_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,40,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dmnwagjDbfGuxqa subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dmnwagjDbfGuxqa .L_small_initial_partial_block_dmnwagjDbfGuxqa: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dmnwagjDbfGuxqa: orq %r8,%r8 je .L_after_reduction_dmnwagjDbfGuxqa vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_dmnwagjDbfGuxqa: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_7_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FFvlakmlCAfckcF subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FFvlakmlCAfckcF .L_small_initial_partial_block_FFvlakmlCAfckcF: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FFvlakmlCAfckcF: orq %r8,%r8 je .L_after_reduction_FFvlakmlCAfckcF vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_FFvlakmlCAfckcF: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_8_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_brxnjeBcvFoBFjp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_brxnjeBcvFoBFjp .L_small_initial_partial_block_brxnjeBcvFoBFjp: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_brxnjeBcvFoBFjp: orq %r8,%r8 je .L_after_reduction_brxnjeBcvFoBFjp vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_brxnjeBcvFoBFjp: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_9_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,8,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ruEgsxDerxegpsB subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ruEgsxDerxegpsB .L_small_initial_partial_block_ruEgsxDerxegpsB: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ruEgsxDerxegpsB: orq %r8,%r8 je .L_after_reduction_ruEgsxDerxegpsB vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ruEgsxDerxegpsB: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_10_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,40,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DtsnEBEgqapGgkD subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DtsnEBEgqapGgkD .L_small_initial_partial_block_DtsnEBEgqapGgkD: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DtsnEBEgqapGgkD: orq %r8,%r8 je .L_after_reduction_DtsnEBEgqapGgkD vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_DtsnEBEgqapGgkD: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_11_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FAlijzFrzEsACFt subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FAlijzFrzEsACFt .L_small_initial_partial_block_FAlijzFrzEsACFt: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FAlijzFrzEsACFt: orq %r8,%r8 je .L_after_reduction_FAlijzFrzEsACFt vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_FAlijzFrzEsACFt: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_12_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xdDFiiniApojwBg subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xdDFiiniApojwBg .L_small_initial_partial_block_xdDFiiniApojwBg: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xdDFiiniApojwBg: orq %r8,%r8 je .L_after_reduction_xdDFiiniApojwBg vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_xdDFiiniApojwBg: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_13_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,8,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nwkAjutBGaaatpl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nwkAjutBGaaatpl .L_small_initial_partial_block_nwkAjutBGaaatpl: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nwkAjutBGaaatpl: orq %r8,%r8 je .L_after_reduction_nwkAjutBGaaatpl vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_nwkAjutBGaaatpl: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_14_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,40,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ffynzdqrsbdreFk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ffynzdqrsbdreFk .L_small_initial_partial_block_ffynzdqrsbdreFk: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ffynzdqrsbdreFk: orq %r8,%r8 je .L_after_reduction_ffynzdqrsbdreFk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ffynzdqrsbdreFk: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_15_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hbmAeclAGCyurof subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hbmAeclAGCyurof .L_small_initial_partial_block_hbmAeclAGCyurof: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hbmAeclAGCyurof: orq %r8,%r8 je .L_after_reduction_hbmAeclAGCyurof vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_hbmAeclAGCyurof: jmp .L_small_initial_blocks_encrypted_fAioGdenAmmvupb .L_small_initial_num_blocks_is_16_fAioGdenAmmvupb: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_fvqkmnelfBwdflt: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fvqkmnelfBwdflt: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_fvqkmnelfBwdflt: .L_small_initial_blocks_encrypted_fAioGdenAmmvupb: .L_ghash_done_icBhFhCkojGgnBc: vmovdqu64 %xmm2,0(%rsi) .L_enc_dec_done_icBhFhCkojGgnBc: vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_abort_icBhFhCkojGgnBc: jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_192_avx512: orq %r8,%r8 je .L_enc_dec_abort_efvnrtvwAsfehEC xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 movl (%rdx),%eax orq %rax,%rax je .L_partial_block_done_Fvzomuuccfdfevt movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 leaq 80(%rsi),%r10 vmovdqu64 240(%r10),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %rax,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%rax,1),%r13 subq $16,%r13 jge .L_no_extra_mask_Fvzomuuccfdfevt subq %r13,%r12 .L_no_extra_mask_Fvzomuuccfdfevt: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_Fvzomuuccfdfevt .byte 98,243,13,8,68,252,17 .byte 98,115,13,8,68,212,0 .byte 98,115,13,8,68,220,1 .byte 98,115,13,8,68,244,16 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 .byte 98,83,37,8,68,214,1 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 .byte 98,83,37,8,68,214,0 vpsrldq $4,%xmm10,%xmm10 .byte 98,83,37,8,68,246,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movl $0,(%rdx) movq %rax,%r12 movq $16,%rax subq %r12,%rax jmp .L_enc_dec_done_Fvzomuuccfdfevt .L_partial_incomplete_Fvzomuuccfdfevt: addl %r8d,(%rdx) movq %r8,%rax .L_enc_dec_done_Fvzomuuccfdfevt: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%rax,2),%k1 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_Fvzomuuccfdfevt: vmovdqu64 0(%rsi),%xmm2 subq %rax,%r8 je .L_enc_dec_done_efvnrtvwAsfehEC cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_efvnrtvwAsfehEC vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_bmCGDqjpElhfFfq vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_bmCGDqjpElhfFfq .L_next_16_overflow_bmCGDqjpElhfFfq: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_bmCGDqjpElhfFfq: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%rax,1),%zmm0 vmovdqu8 64(%rcx,%rax,1),%zmm3 vmovdqu8 128(%rcx,%rax,1),%zmm4 vmovdqu8 192(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 176(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 192(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%rax,1) vmovdqu8 %zmm10,64(%r10,%rax,1) vmovdqu8 %zmm11,128(%r10,%rax,1) vmovdqu8 %zmm12,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_mbihlziFEFsDoGE vmovdqu64 192(%r12),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 128(%r12),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 64(%r12),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 0(%r12),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_mbihlziFEFsDoGE: cmpq $512,%r8 jb .L_message_below_32_blocks_efvnrtvwAsfehEC cmpb $240,%r15b jae .L_next_16_overflow_lakxgokamypkjgE vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_lakxgokamypkjgE .L_next_16_overflow_lakxgokamypkjgE: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_lakxgokamypkjgE: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%rax,1),%zmm0 vmovdqu8 320(%rcx,%rax,1),%zmm3 vmovdqu8 384(%rcx,%rax,1),%zmm4 vmovdqu8 448(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 176(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 192(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%rax,1) vmovdqu8 %zmm10,320(%r10,%rax,1) vmovdqu8 %zmm11,384(%r10,%rax,1) vmovdqu8 %zmm12,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_jpElfyvBextCmie vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_jpElfyvBextCmie: movq $1,%r14 addq $512,%rax subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_efvnrtvwAsfehEC .L_encrypt_big_nblocks_efvnrtvwAsfehEC: cmpb $240,%r15b jae .L_16_blocks_overflow_AlopGldBavsssnG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_AlopGldBavsssnG .L_16_blocks_overflow_AlopGldBavsssnG: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_AlopGldBavsssnG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_gaBCwkclDxgqitC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_gaBCwkclDxgqitC .L_16_blocks_overflow_gaBCwkclDxgqitC: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_gaBCwkclDxgqitC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_Fxngerofutwuigg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_Fxngerofutwuigg .L_16_blocks_overflow_Fxngerofutwuigg: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_Fxngerofutwuigg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%rax,1),%zmm17 vmovdqu8 576(%rcx,%rax,1),%zmm19 vmovdqu8 640(%rcx,%rax,1),%zmm20 vmovdqu8 704(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%rax,1) vmovdqu8 %zmm3,576(%r10,%rax,1) vmovdqu8 %zmm4,640(%r10,%rax,1) vmovdqu8 %zmm5,704(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%rax subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_efvnrtvwAsfehEC .L_no_more_big_nblocks_efvnrtvwAsfehEC: cmpq $512,%r8 jae .L_encrypt_32_blocks_efvnrtvwAsfehEC cmpq $256,%r8 jae .L_encrypt_16_blocks_efvnrtvwAsfehEC .L_encrypt_0_blocks_ghash_32_efvnrtvwAsfehEC: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_xtiyCEhGGvgkorn cmpl $8,%r10d je .L_last_num_blocks_is_8_xtiyCEhGGvgkorn jb .L_last_num_blocks_is_7_1_xtiyCEhGGvgkorn cmpl $12,%r10d je .L_last_num_blocks_is_12_xtiyCEhGGvgkorn jb .L_last_num_blocks_is_11_9_xtiyCEhGGvgkorn cmpl $15,%r10d je .L_last_num_blocks_is_15_xtiyCEhGGvgkorn ja .L_last_num_blocks_is_16_xtiyCEhGGvgkorn cmpl $14,%r10d je .L_last_num_blocks_is_14_xtiyCEhGGvgkorn jmp .L_last_num_blocks_is_13_xtiyCEhGGvgkorn .L_last_num_blocks_is_11_9_xtiyCEhGGvgkorn: cmpl $10,%r10d je .L_last_num_blocks_is_10_xtiyCEhGGvgkorn ja .L_last_num_blocks_is_11_xtiyCEhGGvgkorn jmp .L_last_num_blocks_is_9_xtiyCEhGGvgkorn .L_last_num_blocks_is_7_1_xtiyCEhGGvgkorn: cmpl $4,%r10d je .L_last_num_blocks_is_4_xtiyCEhGGvgkorn jb .L_last_num_blocks_is_3_1_xtiyCEhGGvgkorn cmpl $6,%r10d ja .L_last_num_blocks_is_7_xtiyCEhGGvgkorn je .L_last_num_blocks_is_6_xtiyCEhGGvgkorn jmp .L_last_num_blocks_is_5_xtiyCEhGGvgkorn .L_last_num_blocks_is_3_1_xtiyCEhGGvgkorn: cmpl $2,%r10d ja .L_last_num_blocks_is_3_xtiyCEhGGvgkorn je .L_last_num_blocks_is_2_xtiyCEhGGvgkorn .L_last_num_blocks_is_1_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_nlADBBgdbvxiiEb vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_nlADBBgdbvxiiEb .L_16_blocks_overflow_nlADBBgdbvxiiEb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_nlADBBgdbvxiiEb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_tqujgvqggqpCibu subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tqujgvqggqpCibu .L_small_initial_partial_block_tqujgvqggqpCibu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_tqujgvqggqpCibu .L_small_initial_compute_done_tqujgvqggqpCibu: .L_after_reduction_tqujgvqggqpCibu: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_2_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_uvnjGlBDyvrfirm vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_uvnjGlBDyvrfirm .L_16_blocks_overflow_uvnjGlBDyvrfirm: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_uvnjGlBDyvrfirm: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wDeAjiDoocmqspC subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wDeAjiDoocmqspC .L_small_initial_partial_block_wDeAjiDoocmqspC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wDeAjiDoocmqspC: orq %r8,%r8 je .L_after_reduction_wDeAjiDoocmqspC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wDeAjiDoocmqspC: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_3_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_FgovsDdCfEGrkbF vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_FgovsDdCfEGrkbF .L_16_blocks_overflow_FgovsDdCfEGrkbF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_FgovsDdCfEGrkbF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vofcBkyofakpciE subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vofcBkyofakpciE .L_small_initial_partial_block_vofcBkyofakpciE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vofcBkyofakpciE: orq %r8,%r8 je .L_after_reduction_vofcBkyofakpciE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vofcBkyofakpciE: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_4_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_DlimwiDzackronx vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_DlimwiDzackronx .L_16_blocks_overflow_DlimwiDzackronx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_DlimwiDzackronx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_umExCeAmGaBqmig subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_umExCeAmGaBqmig .L_small_initial_partial_block_umExCeAmGaBqmig: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_umExCeAmGaBqmig: orq %r8,%r8 je .L_after_reduction_umExCeAmGaBqmig vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_umExCeAmGaBqmig: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_5_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_qGrgsssqhFxDdtg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_qGrgsssqhFxDdtg .L_16_blocks_overflow_qGrgsssqhFxDdtg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_qGrgsssqhFxDdtg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DkwztEgqyefkjcA subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DkwztEgqyefkjcA .L_small_initial_partial_block_DkwztEgqyefkjcA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DkwztEgqyefkjcA: orq %r8,%r8 je .L_after_reduction_DkwztEgqyefkjcA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DkwztEgqyefkjcA: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_6_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_ufEGEnqpAFAEymx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_ufEGEnqpAFAEymx .L_16_blocks_overflow_ufEGEnqpAFAEymx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_ufEGEnqpAFAEymx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AjqanfyCsBedpsg subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AjqanfyCsBedpsg .L_small_initial_partial_block_AjqanfyCsBedpsg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AjqanfyCsBedpsg: orq %r8,%r8 je .L_after_reduction_AjqanfyCsBedpsg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AjqanfyCsBedpsg: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_7_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_xgpGrqoEEApwzGE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_xgpGrqoEEApwzGE .L_16_blocks_overflow_xgpGrqoEEApwzGE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_xgpGrqoEEApwzGE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lrumjmlatrsmlag subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lrumjmlatrsmlag .L_small_initial_partial_block_lrumjmlatrsmlag: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lrumjmlatrsmlag: orq %r8,%r8 je .L_after_reduction_lrumjmlatrsmlag vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lrumjmlatrsmlag: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_8_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_DBafwcnsvcxAbsv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_DBafwcnsvcxAbsv .L_16_blocks_overflow_DBafwcnsvcxAbsv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_DBafwcnsvcxAbsv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vptqcrjpEiCjEDi subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vptqcrjpEiCjEDi .L_small_initial_partial_block_vptqcrjpEiCjEDi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vptqcrjpEiCjEDi: orq %r8,%r8 je .L_after_reduction_vptqcrjpEiCjEDi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vptqcrjpEiCjEDi: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_9_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_muonozkGretEzbg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_muonozkGretEzbg .L_16_blocks_overflow_muonozkGretEzbg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_muonozkGretEzbg: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tkpprjhbsieissq subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tkpprjhbsieissq .L_small_initial_partial_block_tkpprjhbsieissq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tkpprjhbsieissq: orq %r8,%r8 je .L_after_reduction_tkpprjhbsieissq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tkpprjhbsieissq: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_10_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_tcxAtedExcFvxwb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_tcxAtedExcFvxwb .L_16_blocks_overflow_tcxAtedExcFvxwb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_tcxAtedExcFvxwb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jcddyvvAxCAjvqC subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jcddyvvAxCAjvqC .L_small_initial_partial_block_jcddyvvAxCAjvqC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jcddyvvAxCAjvqC: orq %r8,%r8 je .L_after_reduction_jcddyvvAxCAjvqC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jcddyvvAxCAjvqC: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_11_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_oCyoemhjBbobeot vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_oCyoemhjBbobeot .L_16_blocks_overflow_oCyoemhjBbobeot: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_oCyoemhjBbobeot: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DusiGqzupzswzGi subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DusiGqzupzswzGi .L_small_initial_partial_block_DusiGqzupzswzGi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DusiGqzupzswzGi: orq %r8,%r8 je .L_after_reduction_DusiGqzupzswzGi vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DusiGqzupzswzGi: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_12_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_rechbAAmkFuppsn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_rechbAAmkFuppsn .L_16_blocks_overflow_rechbAAmkFuppsn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_rechbAAmkFuppsn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lgDrfakaDoGugoh subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lgDrfakaDoGugoh .L_small_initial_partial_block_lgDrfakaDoGugoh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lgDrfakaDoGugoh: orq %r8,%r8 je .L_after_reduction_lgDrfakaDoGugoh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lgDrfakaDoGugoh: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_13_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_llFkwrFhuxfvsGD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_llFkwrFhuxfvsGD .L_16_blocks_overflow_llFkwrFhuxfvsGD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_llFkwrFhuxfvsGD: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qwqiEdfkpnfpFcA subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qwqiEdfkpnfpFcA .L_small_initial_partial_block_qwqiEdfkpnfpFcA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qwqiEdfkpnfpFcA: orq %r8,%r8 je .L_after_reduction_qwqiEdfkpnfpFcA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qwqiEdfkpnfpFcA: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_14_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_euGgDuqlvgCFoFG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_euGgDuqlvgCFoFG .L_16_blocks_overflow_euGgDuqlvgCFoFG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_euGgDuqlvgCFoFG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jAmrCFqArnxiBwr subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jAmrCFqArnxiBwr .L_small_initial_partial_block_jAmrCFqArnxiBwr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jAmrCFqArnxiBwr: orq %r8,%r8 je .L_after_reduction_jAmrCFqArnxiBwr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jAmrCFqArnxiBwr: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_15_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_pFsoEbjdpyaFdzt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_pFsoEbjdpyaFdzt .L_16_blocks_overflow_pFsoEbjdpyaFdzt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_pFsoEbjdpyaFdzt: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ynvaqdiwqpExsAh subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ynvaqdiwqpExsAh .L_small_initial_partial_block_ynvaqdiwqpExsAh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ynvaqdiwqpExsAh: orq %r8,%r8 je .L_after_reduction_ynvaqdiwqpExsAh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ynvaqdiwqpExsAh: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_16_xtiyCEhGGvgkorn: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_lxzkkenajCqycbF vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_lxzkkenajCqycbF .L_16_blocks_overflow_lxzkkenajCqycbF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_lxzkkenajCqycbF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_rerkgBbyampldto: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rerkgBbyampldto: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rerkgBbyampldto: jmp .L_last_blocks_done_xtiyCEhGGvgkorn .L_last_num_blocks_is_0_xtiyCEhGGvgkorn: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_xtiyCEhGGvgkorn: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_efvnrtvwAsfehEC .L_encrypt_32_blocks_efvnrtvwAsfehEC: cmpb $240,%r15b jae .L_16_blocks_overflow_kzaebDdDwylbAcu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_kzaebDdDwylbAcu .L_16_blocks_overflow_kzaebDdDwylbAcu: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_kzaebDdDwylbAcu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_hfufmxvqjkdtxiG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_hfufmxvqjkdtxiG .L_16_blocks_overflow_hfufmxvqjkdtxiG: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_hfufmxvqjkdtxiG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%rax movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_nGrjyBwfEzyFAvA cmpl $8,%r10d je .L_last_num_blocks_is_8_nGrjyBwfEzyFAvA jb .L_last_num_blocks_is_7_1_nGrjyBwfEzyFAvA cmpl $12,%r10d je .L_last_num_blocks_is_12_nGrjyBwfEzyFAvA jb .L_last_num_blocks_is_11_9_nGrjyBwfEzyFAvA cmpl $15,%r10d je .L_last_num_blocks_is_15_nGrjyBwfEzyFAvA ja .L_last_num_blocks_is_16_nGrjyBwfEzyFAvA cmpl $14,%r10d je .L_last_num_blocks_is_14_nGrjyBwfEzyFAvA jmp .L_last_num_blocks_is_13_nGrjyBwfEzyFAvA .L_last_num_blocks_is_11_9_nGrjyBwfEzyFAvA: cmpl $10,%r10d je .L_last_num_blocks_is_10_nGrjyBwfEzyFAvA ja .L_last_num_blocks_is_11_nGrjyBwfEzyFAvA jmp .L_last_num_blocks_is_9_nGrjyBwfEzyFAvA .L_last_num_blocks_is_7_1_nGrjyBwfEzyFAvA: cmpl $4,%r10d je .L_last_num_blocks_is_4_nGrjyBwfEzyFAvA jb .L_last_num_blocks_is_3_1_nGrjyBwfEzyFAvA cmpl $6,%r10d ja .L_last_num_blocks_is_7_nGrjyBwfEzyFAvA je .L_last_num_blocks_is_6_nGrjyBwfEzyFAvA jmp .L_last_num_blocks_is_5_nGrjyBwfEzyFAvA .L_last_num_blocks_is_3_1_nGrjyBwfEzyFAvA: cmpl $2,%r10d ja .L_last_num_blocks_is_3_nGrjyBwfEzyFAvA je .L_last_num_blocks_is_2_nGrjyBwfEzyFAvA .L_last_num_blocks_is_1_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_mklqBGCbyBjeEom vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_mklqBGCbyBjeEom .L_16_blocks_overflow_mklqBGCbyBjeEom: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_mklqBGCbyBjeEom: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_AljAsgffjDBAEDB subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AljAsgffjDBAEDB .L_small_initial_partial_block_AljAsgffjDBAEDB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_AljAsgffjDBAEDB .L_small_initial_compute_done_AljAsgffjDBAEDB: .L_after_reduction_AljAsgffjDBAEDB: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_2_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_ADzEaGzEEnztayt vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_ADzEaGzEEnztayt .L_16_blocks_overflow_ADzEaGzEEnztayt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_ADzEaGzEEnztayt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_awmEjFhlgwizrsw subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_awmEjFhlgwizrsw .L_small_initial_partial_block_awmEjFhlgwizrsw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_awmEjFhlgwizrsw: orq %r8,%r8 je .L_after_reduction_awmEjFhlgwizrsw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_awmEjFhlgwizrsw: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_3_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_lcaBxDbeGChbeFD vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_lcaBxDbeGChbeFD .L_16_blocks_overflow_lcaBxDbeGChbeFD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_lcaBxDbeGChbeFD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mBDBtxmxpwzmxwj subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mBDBtxmxpwzmxwj .L_small_initial_partial_block_mBDBtxmxpwzmxwj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mBDBtxmxpwzmxwj: orq %r8,%r8 je .L_after_reduction_mBDBtxmxpwzmxwj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mBDBtxmxpwzmxwj: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_4_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_pawpbdkivckxDwC vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_pawpbdkivckxDwC .L_16_blocks_overflow_pawpbdkivckxDwC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_pawpbdkivckxDwC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wubpbikcrdlgswu subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wubpbikcrdlgswu .L_small_initial_partial_block_wubpbikcrdlgswu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wubpbikcrdlgswu: orq %r8,%r8 je .L_after_reduction_wubpbikcrdlgswu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wubpbikcrdlgswu: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_5_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_DaxgvFmGcDpdBDr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_DaxgvFmGcDpdBDr .L_16_blocks_overflow_DaxgvFmGcDpdBDr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_DaxgvFmGcDpdBDr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wfpxmlzpEjGxgfg subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wfpxmlzpEjGxgfg .L_small_initial_partial_block_wfpxmlzpEjGxgfg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wfpxmlzpEjGxgfg: orq %r8,%r8 je .L_after_reduction_wfpxmlzpEjGxgfg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wfpxmlzpEjGxgfg: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_6_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_GCBuEfGizfDEkbf vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_GCBuEfGizfDEkbf .L_16_blocks_overflow_GCBuEfGizfDEkbf: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_GCBuEfGizfDEkbf: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_bsGacaiacduekkh subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_bsGacaiacduekkh .L_small_initial_partial_block_bsGacaiacduekkh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_bsGacaiacduekkh: orq %r8,%r8 je .L_after_reduction_bsGacaiacduekkh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_bsGacaiacduekkh: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_7_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_sxxwCglaApctqvC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_sxxwCglaApctqvC .L_16_blocks_overflow_sxxwCglaApctqvC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_sxxwCglaApctqvC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yiCblticDBdDvqz subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yiCblticDBdDvqz .L_small_initial_partial_block_yiCblticDBdDvqz: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yiCblticDBdDvqz: orq %r8,%r8 je .L_after_reduction_yiCblticDBdDvqz vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yiCblticDBdDvqz: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_8_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_CnnuddjEBnFGdsj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_CnnuddjEBnFGdsj .L_16_blocks_overflow_CnnuddjEBnFGdsj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_CnnuddjEBnFGdsj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ozBrEzEFaraubuw subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ozBrEzEFaraubuw .L_small_initial_partial_block_ozBrEzEFaraubuw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ozBrEzEFaraubuw: orq %r8,%r8 je .L_after_reduction_ozBrEzEFaraubuw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ozBrEzEFaraubuw: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_9_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_jwawBbqsGrnbEEd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_jwawBbqsGrnbEEd .L_16_blocks_overflow_jwawBbqsGrnbEEd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_jwawBbqsGrnbEEd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FtmdhyAthqlklcF subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FtmdhyAthqlklcF .L_small_initial_partial_block_FtmdhyAthqlklcF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FtmdhyAthqlklcF: orq %r8,%r8 je .L_after_reduction_FtmdhyAthqlklcF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FtmdhyAthqlklcF: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_10_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_bEhtipvqjwytqAA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_bEhtipvqjwytqAA .L_16_blocks_overflow_bEhtipvqjwytqAA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_bEhtipvqjwytqAA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dGfczcdzdkvubwf subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dGfczcdzdkvubwf .L_small_initial_partial_block_dGfczcdzdkvubwf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dGfczcdzdkvubwf: orq %r8,%r8 je .L_after_reduction_dGfczcdzdkvubwf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dGfczcdzdkvubwf: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_11_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_peywgEttBymhlkG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_peywgEttBymhlkG .L_16_blocks_overflow_peywgEttBymhlkG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_peywgEttBymhlkG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kGoguFjBsnAyegA subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kGoguFjBsnAyegA .L_small_initial_partial_block_kGoguFjBsnAyegA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kGoguFjBsnAyegA: orq %r8,%r8 je .L_after_reduction_kGoguFjBsnAyegA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kGoguFjBsnAyegA: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_12_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_pfftEtegsrsinbs vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_pfftEtegsrsinbs .L_16_blocks_overflow_pfftEtegsrsinbs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_pfftEtegsrsinbs: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_epFvAomFdDAhsfr subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_epFvAomFdDAhsfr .L_small_initial_partial_block_epFvAomFdDAhsfr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_epFvAomFdDAhsfr: orq %r8,%r8 je .L_after_reduction_epFvAomFdDAhsfr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_epFvAomFdDAhsfr: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_13_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_hoEpuvlFtAdDDCj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_hoEpuvlFtAdDDCj .L_16_blocks_overflow_hoEpuvlFtAdDDCj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_hoEpuvlFtAdDDCj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lwaCfdsabqxsDae subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lwaCfdsabqxsDae .L_small_initial_partial_block_lwaCfdsabqxsDae: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lwaCfdsabqxsDae: orq %r8,%r8 je .L_after_reduction_lwaCfdsabqxsDae vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lwaCfdsabqxsDae: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_14_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_kDibsGzbehdlyln vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_kDibsGzbehdlyln .L_16_blocks_overflow_kDibsGzbehdlyln: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_kDibsGzbehdlyln: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aygEgEvDgGbktBd subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aygEgEvDgGbktBd .L_small_initial_partial_block_aygEgEvDgGbktBd: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aygEgEvDgGbktBd: orq %r8,%r8 je .L_after_reduction_aygEgEvDgGbktBd vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aygEgEvDgGbktBd: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_15_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_vejCgbGykbnkAnl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_vejCgbGykbnkAnl .L_16_blocks_overflow_vejCgbGykbnkAnl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_vejCgbGykbnkAnl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AeaGuuDepzdAfkw subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AeaGuuDepzdAfkw .L_small_initial_partial_block_AeaGuuDepzdAfkw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AeaGuuDepzdAfkw: orq %r8,%r8 je .L_after_reduction_AeaGuuDepzdAfkw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AeaGuuDepzdAfkw: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_16_nGrjyBwfEzyFAvA: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_oEmrkvwdwsmBgef vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_oEmrkvwdwsmBgef .L_16_blocks_overflow_oEmrkvwdwsmBgef: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_oEmrkvwdwsmBgef: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_emEtFnwcsvbsGee: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_emEtFnwcsvbsGee: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_emEtFnwcsvbsGee: jmp .L_last_blocks_done_nGrjyBwfEzyFAvA .L_last_num_blocks_is_0_nGrjyBwfEzyFAvA: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_nGrjyBwfEzyFAvA: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_efvnrtvwAsfehEC .L_encrypt_16_blocks_efvnrtvwAsfehEC: cmpb $240,%r15b jae .L_16_blocks_overflow_evgrutpeAjmaukd vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_evgrutpeAjmaukd .L_16_blocks_overflow_evgrutpeAjmaukd: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_evgrutpeAjmaukd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%rax movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_FBaFgdErDhzlksr cmpl $8,%r10d je .L_last_num_blocks_is_8_FBaFgdErDhzlksr jb .L_last_num_blocks_is_7_1_FBaFgdErDhzlksr cmpl $12,%r10d je .L_last_num_blocks_is_12_FBaFgdErDhzlksr jb .L_last_num_blocks_is_11_9_FBaFgdErDhzlksr cmpl $15,%r10d je .L_last_num_blocks_is_15_FBaFgdErDhzlksr ja .L_last_num_blocks_is_16_FBaFgdErDhzlksr cmpl $14,%r10d je .L_last_num_blocks_is_14_FBaFgdErDhzlksr jmp .L_last_num_blocks_is_13_FBaFgdErDhzlksr .L_last_num_blocks_is_11_9_FBaFgdErDhzlksr: cmpl $10,%r10d je .L_last_num_blocks_is_10_FBaFgdErDhzlksr ja .L_last_num_blocks_is_11_FBaFgdErDhzlksr jmp .L_last_num_blocks_is_9_FBaFgdErDhzlksr .L_last_num_blocks_is_7_1_FBaFgdErDhzlksr: cmpl $4,%r10d je .L_last_num_blocks_is_4_FBaFgdErDhzlksr jb .L_last_num_blocks_is_3_1_FBaFgdErDhzlksr cmpl $6,%r10d ja .L_last_num_blocks_is_7_FBaFgdErDhzlksr je .L_last_num_blocks_is_6_FBaFgdErDhzlksr jmp .L_last_num_blocks_is_5_FBaFgdErDhzlksr .L_last_num_blocks_is_3_1_FBaFgdErDhzlksr: cmpl $2,%r10d ja .L_last_num_blocks_is_3_FBaFgdErDhzlksr je .L_last_num_blocks_is_2_FBaFgdErDhzlksr .L_last_num_blocks_is_1_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_EztzACczExrozqe vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_EztzACczExrozqe .L_16_blocks_overflow_EztzACczExrozqe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_EztzACczExrozqe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,8,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_CCCssCzirDpGCgu subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CCCssCzirDpGCgu .L_small_initial_partial_block_CCCssCzirDpGCgu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_CCCssCzirDpGCgu .L_small_initial_compute_done_CCCssCzirDpGCgu: .L_after_reduction_CCCssCzirDpGCgu: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_2_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_ddpheeylmysesqA vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_ddpheeylmysesqA .L_16_blocks_overflow_ddpheeylmysesqA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_ddpheeylmysesqA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,40,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kEwhkniEotxddri subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kEwhkniEotxddri .L_small_initial_partial_block_kEwhkniEotxddri: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kEwhkniEotxddri: orq %r8,%r8 je .L_after_reduction_kEwhkniEotxddri vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kEwhkniEotxddri: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_3_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_vAzgdsEEohhszlv vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_vAzgdsEEohhszlv .L_16_blocks_overflow_vAzgdsEEohhszlv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_vAzgdsEEohhszlv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vgpvCquElabkfFm subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vgpvCquElabkfFm .L_small_initial_partial_block_vgpvCquElabkfFm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vgpvCquElabkfFm: orq %r8,%r8 je .L_after_reduction_vgpvCquElabkfFm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vgpvCquElabkfFm: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_4_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_ciiDnbwsdfFhyEA vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_ciiDnbwsdfFhyEA .L_16_blocks_overflow_ciiDnbwsdfFhyEA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_ciiDnbwsdfFhyEA: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BmnofkldoqxnfuE subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BmnofkldoqxnfuE .L_small_initial_partial_block_BmnofkldoqxnfuE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BmnofkldoqxnfuE: orq %r8,%r8 je .L_after_reduction_BmnofkldoqxnfuE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BmnofkldoqxnfuE: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_5_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_AGvFmhBetCxAviv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_AGvFmhBetCxAviv .L_16_blocks_overflow_AGvFmhBetCxAviv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_AGvFmhBetCxAviv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tnjvAdygufmEFFh subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tnjvAdygufmEFFh .L_small_initial_partial_block_tnjvAdygufmEFFh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tnjvAdygufmEFFh: orq %r8,%r8 je .L_after_reduction_tnjvAdygufmEFFh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tnjvAdygufmEFFh: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_6_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_sjympigbCCDhsDn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_sjympigbCCDhsDn .L_16_blocks_overflow_sjympigbCCDhsDn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_sjympigbCCDhsDn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cnyvDpbBAuzhoGm subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cnyvDpbBAuzhoGm .L_small_initial_partial_block_cnyvDpbBAuzhoGm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cnyvDpbBAuzhoGm: orq %r8,%r8 je .L_after_reduction_cnyvDpbBAuzhoGm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cnyvDpbBAuzhoGm: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_7_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_puBiejaewnoDvka vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_puBiejaewnoDvka .L_16_blocks_overflow_puBiejaewnoDvka: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_puBiejaewnoDvka: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kEkkBlBkynveErA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kEkkBlBkynveErA .L_small_initial_partial_block_kEkkBlBkynveErA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kEkkBlBkynveErA: orq %r8,%r8 je .L_after_reduction_kEkkBlBkynveErA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kEkkBlBkynveErA: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_8_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_eaeCeiduedGDdDq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_eaeCeiduedGDdDq .L_16_blocks_overflow_eaeCeiduedGDdDq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_eaeCeiduedGDdDq: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qsuohqatcFrqreB subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qsuohqatcFrqreB .L_small_initial_partial_block_qsuohqatcFrqreB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qsuohqatcFrqreB: orq %r8,%r8 je .L_after_reduction_qsuohqatcFrqreB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qsuohqatcFrqreB: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_9_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_zgrBucdeiivwwje vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_zgrBucdeiivwwje .L_16_blocks_overflow_zgrBucdeiivwwje: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_zgrBucdeiivwwje: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BobokvzEgBCGCux subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BobokvzEgBCGCux .L_small_initial_partial_block_BobokvzEgBCGCux: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BobokvzEgBCGCux: orq %r8,%r8 je .L_after_reduction_BobokvzEgBCGCux vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BobokvzEgBCGCux: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_10_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_rqjyEzzCiBijwho vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_rqjyEzzCiBijwho .L_16_blocks_overflow_rqjyEzzCiBijwho: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_rqjyEzzCiBijwho: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gptBtCibyiDhlou subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gptBtCibyiDhlou .L_small_initial_partial_block_gptBtCibyiDhlou: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gptBtCibyiDhlou: orq %r8,%r8 je .L_after_reduction_gptBtCibyiDhlou vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gptBtCibyiDhlou: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_11_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_udirAnChEpiDCdb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_udirAnChEpiDCdb .L_16_blocks_overflow_udirAnChEpiDCdb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_udirAnChEpiDCdb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EuymoBDpuhDzkkw subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EuymoBDpuhDzkkw .L_small_initial_partial_block_EuymoBDpuhDzkkw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EuymoBDpuhDzkkw: orq %r8,%r8 je .L_after_reduction_EuymoBDpuhDzkkw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EuymoBDpuhDzkkw: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_12_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_nCrveguADGnpgFu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_nCrveguADGnpgFu .L_16_blocks_overflow_nCrveguADGnpgFu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_nCrveguADGnpgFu: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EbClbforwjDGhdq subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EbClbforwjDGhdq .L_small_initial_partial_block_EbClbforwjDGhdq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EbClbforwjDGhdq: orq %r8,%r8 je .L_after_reduction_EbClbforwjDGhdq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EbClbforwjDGhdq: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_13_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_FuAeDsuGfAcCbnh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_FuAeDsuGfAcCbnh .L_16_blocks_overflow_FuAeDsuGfAcCbnh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_FuAeDsuGfAcCbnh: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GdeeilznaFbDlhh subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GdeeilznaFbDlhh .L_small_initial_partial_block_GdeeilznaFbDlhh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GdeeilznaFbDlhh: orq %r8,%r8 je .L_after_reduction_GdeeilznaFbDlhh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GdeeilznaFbDlhh: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_14_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_FvEhyckDsphilDy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_FvEhyckDsphilDy .L_16_blocks_overflow_FvEhyckDsphilDy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_FvEhyckDsphilDy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fkoDbsekulkxCkw subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fkoDbsekulkxCkw .L_small_initial_partial_block_fkoDbsekulkxCkw: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fkoDbsekulkxCkw: orq %r8,%r8 je .L_after_reduction_fkoDbsekulkxCkw vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fkoDbsekulkxCkw: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_15_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_lpConoqwylkjlwn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_lpConoqwylkjlwn .L_16_blocks_overflow_lpConoqwylkjlwn: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_lpConoqwylkjlwn: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DbmjnDvmvfAywny subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DbmjnDvmvfAywny .L_small_initial_partial_block_DbmjnDvmvfAywny: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DbmjnDvmvfAywny: orq %r8,%r8 je .L_after_reduction_DbmjnDvmvfAywny vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_DbmjnDvmvfAywny: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_16_FBaFgdErDhzlksr: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_vhaFwxkrByAhtie vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_vhaFwxkrByAhtie .L_16_blocks_overflow_vhaFwxkrByAhtie: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_vhaFwxkrByAhtie: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_ciyykzjryphtjAc: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ciyykzjryphtjAc: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ciyykzjryphtjAc: jmp .L_last_blocks_done_FBaFgdErDhzlksr .L_last_num_blocks_is_0_FBaFgdErDhzlksr: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_FBaFgdErDhzlksr: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_efvnrtvwAsfehEC .L_message_below_32_blocks_efvnrtvwAsfehEC: subq $256,%r8 addq $256,%rax movl %r8d,%r10d leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_yBFabBiEpjEBBsr vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_yBFabBiEpjEBBsr: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_cuuhesezwjvjmyk cmpl $8,%r10d je .L_last_num_blocks_is_8_cuuhesezwjvjmyk jb .L_last_num_blocks_is_7_1_cuuhesezwjvjmyk cmpl $12,%r10d je .L_last_num_blocks_is_12_cuuhesezwjvjmyk jb .L_last_num_blocks_is_11_9_cuuhesezwjvjmyk cmpl $15,%r10d je .L_last_num_blocks_is_15_cuuhesezwjvjmyk ja .L_last_num_blocks_is_16_cuuhesezwjvjmyk cmpl $14,%r10d je .L_last_num_blocks_is_14_cuuhesezwjvjmyk jmp .L_last_num_blocks_is_13_cuuhesezwjvjmyk .L_last_num_blocks_is_11_9_cuuhesezwjvjmyk: cmpl $10,%r10d je .L_last_num_blocks_is_10_cuuhesezwjvjmyk ja .L_last_num_blocks_is_11_cuuhesezwjvjmyk jmp .L_last_num_blocks_is_9_cuuhesezwjvjmyk .L_last_num_blocks_is_7_1_cuuhesezwjvjmyk: cmpl $4,%r10d je .L_last_num_blocks_is_4_cuuhesezwjvjmyk jb .L_last_num_blocks_is_3_1_cuuhesezwjvjmyk cmpl $6,%r10d ja .L_last_num_blocks_is_7_cuuhesezwjvjmyk je .L_last_num_blocks_is_6_cuuhesezwjvjmyk jmp .L_last_num_blocks_is_5_cuuhesezwjvjmyk .L_last_num_blocks_is_3_1_cuuhesezwjvjmyk: cmpl $2,%r10d ja .L_last_num_blocks_is_3_cuuhesezwjvjmyk je .L_last_num_blocks_is_2_cuuhesezwjvjmyk .L_last_num_blocks_is_1_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_yqjovttCDEvpyyd vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_yqjovttCDEvpyyd .L_16_blocks_overflow_yqjovttCDEvpyyd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_yqjovttCDEvpyyd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_vEAkobbEjFEfDjE subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vEAkobbEjFEfDjE .L_small_initial_partial_block_vEAkobbEjFEfDjE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_vEAkobbEjFEfDjE .L_small_initial_compute_done_vEAkobbEjFEfDjE: .L_after_reduction_vEAkobbEjFEfDjE: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_2_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_dunlemEBzoyBoxa vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_dunlemEBzoyBoxa .L_16_blocks_overflow_dunlemEBzoyBoxa: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_dunlemEBzoyBoxa: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jwqibvpanppwwkg subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jwqibvpanppwwkg .L_small_initial_partial_block_jwqibvpanppwwkg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jwqibvpanppwwkg: orq %r8,%r8 je .L_after_reduction_jwqibvpanppwwkg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_jwqibvpanppwwkg: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_3_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_gknxnDbcehnficG vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_gknxnDbcehnficG .L_16_blocks_overflow_gknxnDbcehnficG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_gknxnDbcehnficG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yrqnxcGbhfxbzua subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yrqnxcGbhfxbzua .L_small_initial_partial_block_yrqnxcGbhfxbzua: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yrqnxcGbhfxbzua: orq %r8,%r8 je .L_after_reduction_yrqnxcGbhfxbzua vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yrqnxcGbhfxbzua: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_4_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_vkChDblsuoFkgEp vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_vkChDblsuoFkgEp .L_16_blocks_overflow_vkChDblsuoFkgEp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_vkChDblsuoFkgEp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kqcfotnkDdwFCle subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kqcfotnkDdwFCle .L_small_initial_partial_block_kqcfotnkDdwFCle: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kqcfotnkDdwFCle: orq %r8,%r8 je .L_after_reduction_kqcfotnkDdwFCle vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kqcfotnkDdwFCle: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_5_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_aGCpdetktlAtivE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_aGCpdetktlAtivE .L_16_blocks_overflow_aGCpdetktlAtivE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_aGCpdetktlAtivE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BhyxbheFwtzAGqD subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BhyxbheFwtzAGqD .L_small_initial_partial_block_BhyxbheFwtzAGqD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BhyxbheFwtzAGqD: orq %r8,%r8 je .L_after_reduction_BhyxbheFwtzAGqD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BhyxbheFwtzAGqD: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_6_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_DlEhcmhmAqggthl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_DlEhcmhmAqggthl .L_16_blocks_overflow_DlEhcmhmAqggthl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_DlEhcmhmAqggthl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ykkpmhjniEvyltu subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ykkpmhjniEvyltu .L_small_initial_partial_block_ykkpmhjniEvyltu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ykkpmhjniEvyltu: orq %r8,%r8 je .L_after_reduction_ykkpmhjniEvyltu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ykkpmhjniEvyltu: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_7_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_szxcAmcFcFxFikD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_szxcAmcFcFxFikD .L_16_blocks_overflow_szxcAmcFcFxFikD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_szxcAmcFcFxFikD: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BsvCgmoprgDppla subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BsvCgmoprgDppla .L_small_initial_partial_block_BsvCgmoprgDppla: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BsvCgmoprgDppla: orq %r8,%r8 je .L_after_reduction_BsvCgmoprgDppla vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BsvCgmoprgDppla: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_8_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_nGgmonbofwfdiqp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_nGgmonbofwfdiqp .L_16_blocks_overflow_nGgmonbofwfdiqp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_nGgmonbofwfdiqp: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qxxbtfdlDzEAenB subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qxxbtfdlDzEAenB .L_small_initial_partial_block_qxxbtfdlDzEAenB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qxxbtfdlDzEAenB: orq %r8,%r8 je .L_after_reduction_qxxbtfdlDzEAenB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qxxbtfdlDzEAenB: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_9_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_isErwnbzGxuwnib vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_isErwnbzGxuwnib .L_16_blocks_overflow_isErwnbzGxuwnib: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_isErwnbzGxuwnib: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ldosriajsdgdtty subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ldosriajsdgdtty .L_small_initial_partial_block_ldosriajsdgdtty: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ldosriajsdgdtty: orq %r8,%r8 je .L_after_reduction_ldosriajsdgdtty vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ldosriajsdgdtty: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_10_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_ylkmjtxhbazdAht vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_ylkmjtxhbazdAht .L_16_blocks_overflow_ylkmjtxhbazdAht: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_ylkmjtxhbazdAht: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cogjdCgsFwwACAv subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cogjdCgsFwwACAv .L_small_initial_partial_block_cogjdCgsFwwACAv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cogjdCgsFwwACAv: orq %r8,%r8 je .L_after_reduction_cogjdCgsFwwACAv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cogjdCgsFwwACAv: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_11_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_amFqbyqnsgkbEyu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_amFqbyqnsgkbEyu .L_16_blocks_overflow_amFqbyqnsgkbEyu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_amFqbyqnsgkbEyu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_duCGbqEavktkktr subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_duCGbqEavktkktr .L_small_initial_partial_block_duCGbqEavktkktr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_duCGbqEavktkktr: orq %r8,%r8 je .L_after_reduction_duCGbqEavktkktr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_duCGbqEavktkktr: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_12_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_Gxdljjoscahpipo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_Gxdljjoscahpipo .L_16_blocks_overflow_Gxdljjoscahpipo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_Gxdljjoscahpipo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EbrtvvbpfhnmgEG subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EbrtvvbpfhnmgEG .L_small_initial_partial_block_EbrtvvbpfhnmgEG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EbrtvvbpfhnmgEG: orq %r8,%r8 je .L_after_reduction_EbrtvvbpfhnmgEG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EbrtvvbpfhnmgEG: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_13_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_BzbwlusABaejjjy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_BzbwlusABaejjjy .L_16_blocks_overflow_BzbwlusABaejjjy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_BzbwlusABaejjjy: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_axssylktqnfAEEo subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_axssylktqnfAEEo .L_small_initial_partial_block_axssylktqnfAEEo: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_axssylktqnfAEEo: orq %r8,%r8 je .L_after_reduction_axssylktqnfAEEo vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_axssylktqnfAEEo: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_14_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_wfxluBeiqgADmFb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_wfxluBeiqgADmFb .L_16_blocks_overflow_wfxluBeiqgADmFb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_wfxluBeiqgADmFb: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_goygycijAEpsvvt subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_goygycijAEpsvvt .L_small_initial_partial_block_goygycijAEpsvvt: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_goygycijAEpsvvt: orq %r8,%r8 je .L_after_reduction_goygycijAEpsvvt vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_goygycijAEpsvvt: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_15_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_vyklFkDwzsnvgsC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_vyklFkDwzsnvgsC .L_16_blocks_overflow_vyklFkDwzsnvgsC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_vyklFkDwzsnvgsC: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wtfwhoaquntnsFC subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wtfwhoaquntnsFC .L_small_initial_partial_block_wtfwhoaquntnsFC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wtfwhoaquntnsFC: orq %r8,%r8 je .L_after_reduction_wtfwhoaquntnsFC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wtfwhoaquntnsFC: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_16_cuuhesezwjvjmyk: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_cwmmduuojwChbzc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_cwmmduuojwChbzc .L_16_blocks_overflow_cwmmduuojwChbzc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_cwmmduuojwChbzc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_EFFoGallwwbomEy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EFFoGallwwbomEy: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EFFoGallwwbomEy: jmp .L_last_blocks_done_cuuhesezwjvjmyk .L_last_num_blocks_is_0_cuuhesezwjvjmyk: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_cuuhesezwjvjmyk: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_efvnrtvwAsfehEC .L_message_below_equal_16_blocks_efvnrtvwAsfehEC: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_sFoDGktxtpnDmhn jl .L_small_initial_num_blocks_is_7_1_sFoDGktxtpnDmhn cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_sFoDGktxtpnDmhn jl .L_small_initial_num_blocks_is_11_9_sFoDGktxtpnDmhn cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_sFoDGktxtpnDmhn cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_sFoDGktxtpnDmhn cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_sFoDGktxtpnDmhn jmp .L_small_initial_num_blocks_is_13_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_11_9_sFoDGktxtpnDmhn: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_sFoDGktxtpnDmhn cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_sFoDGktxtpnDmhn jmp .L_small_initial_num_blocks_is_9_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_7_1_sFoDGktxtpnDmhn: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_sFoDGktxtpnDmhn jl .L_small_initial_num_blocks_is_3_1_sFoDGktxtpnDmhn cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_sFoDGktxtpnDmhn cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_sFoDGktxtpnDmhn jmp .L_small_initial_num_blocks_is_5_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_3_1_sFoDGktxtpnDmhn: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_sFoDGktxtpnDmhn cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_1_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONEa(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%rax,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,8,221,199 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_FGCgmvsGdutropz subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FGCgmvsGdutropz .L_small_initial_partial_block_FGCgmvsGdutropz: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_FGCgmvsGdutropz .L_small_initial_compute_done_FGCgmvsGdutropz: .L_after_reduction_FGCgmvsGdutropz: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_2_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%rax,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,40,221,199 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jEBbtDDBfBjEltG subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jEBbtDDBfBjEltG .L_small_initial_partial_block_jEBbtDDBfBjEltG: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jEBbtDDBfBjEltG: orq %r8,%r8 je .L_after_reduction_jEBbtDDBfBjEltG vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_jEBbtDDBfBjEltG: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_3_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EjibsatBlzkgqAl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EjibsatBlzkgqAl .L_small_initial_partial_block_EjibsatBlzkgqAl: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EjibsatBlzkgqAl: orq %r8,%r8 je .L_after_reduction_EjibsatBlzkgqAl vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_EjibsatBlzkgqAl: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_4_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xqconsagugmDarn subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xqconsagugmDarn .L_small_initial_partial_block_xqconsagugmDarn: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xqconsagugmDarn: orq %r8,%r8 je .L_after_reduction_xqconsagugmDarn vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_xqconsagugmDarn: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_5_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,8,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pbFCejpvpmxjAhk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pbFCejpvpmxjAhk .L_small_initial_partial_block_pbFCejpvpmxjAhk: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pbFCejpvpmxjAhk: orq %r8,%r8 je .L_after_reduction_pbFCejpvpmxjAhk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_pbFCejpvpmxjAhk: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_6_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,40,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_uktkzFjovqcxfqp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_uktkzFjovqcxfqp .L_small_initial_partial_block_uktkzFjovqcxfqp: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_uktkzFjovqcxfqp: orq %r8,%r8 je .L_after_reduction_uktkzFjovqcxfqp vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_uktkzFjovqcxfqp: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_7_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_giliDfyAgzgDsqz subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_giliDfyAgzgDsqz .L_small_initial_partial_block_giliDfyAgzgDsqz: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_giliDfyAgzgDsqz: orq %r8,%r8 je .L_after_reduction_giliDfyAgzgDsqz vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_giliDfyAgzgDsqz: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_8_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_DjnECqEweilEAGu subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_DjnECqEweilEAGu .L_small_initial_partial_block_DjnECqEweilEAGu: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_DjnECqEweilEAGu: orq %r8,%r8 je .L_after_reduction_DjnECqEweilEAGu vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_DjnECqEweilEAGu: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_9_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,8,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kgxaGkfnalAmrwz subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kgxaGkfnalAmrwz .L_small_initial_partial_block_kgxaGkfnalAmrwz: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kgxaGkfnalAmrwz: orq %r8,%r8 je .L_after_reduction_kgxaGkfnalAmrwz vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_kgxaGkfnalAmrwz: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_10_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,40,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BCvcswkitbgmjFe subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BCvcswkitbgmjFe .L_small_initial_partial_block_BCvcswkitbgmjFe: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BCvcswkitbgmjFe: orq %r8,%r8 je .L_after_reduction_BCvcswkitbgmjFe vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_BCvcswkitbgmjFe: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_11_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xxiyEjrxujqtjjz subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xxiyEjrxujqtjjz .L_small_initial_partial_block_xxiyEjrxujqtjjz: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xxiyEjrxujqtjjz: orq %r8,%r8 je .L_after_reduction_xxiyEjrxujqtjjz vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_xxiyEjrxujqtjjz: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_12_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_jAwfsnuhpsyacia subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_jAwfsnuhpsyacia .L_small_initial_partial_block_jAwfsnuhpsyacia: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_jAwfsnuhpsyacia: orq %r8,%r8 je .L_after_reduction_jAwfsnuhpsyacia vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_jAwfsnuhpsyacia: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_13_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,8,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iimAEdydkqcfzCi subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iimAEdydkqcfzCi .L_small_initial_partial_block_iimAEdydkqcfzCi: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iimAEdydkqcfzCi: orq %r8,%r8 je .L_after_reduction_iimAEdydkqcfzCi vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_iimAEdydkqcfzCi: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_14_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,40,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cflpryEedqzCjvl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cflpryEedqzCjvl .L_small_initial_partial_block_cflpryEedqzCjvl: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cflpryEedqzCjvl: orq %r8,%r8 je .L_after_reduction_cflpryEedqzCjvl vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_cflpryEedqzCjvl: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_15_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pFbDADbCplEDFfw subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pFbDADbCplEDFfw .L_small_initial_partial_block_pFbDADbCplEDFfw: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pFbDADbCplEDFfw: orq %r8,%r8 je .L_after_reduction_pFbDADbCplEDFfw vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_pFbDADbCplEDFfw: jmp .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn .L_small_initial_num_blocks_is_16_sFoDGktxtpnDmhn: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_pGqEmoznEqGhujq: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pGqEmoznEqGhujq: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_pGqEmoznEqGhujq: .L_small_initial_blocks_encrypted_sFoDGktxtpnDmhn: .L_ghash_done_efvnrtvwAsfehEC: vmovdqu64 %xmm2,0(%rsi) .L_enc_dec_done_efvnrtvwAsfehEC: vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_abort_efvnrtvwAsfehEC: jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_256_avx512: orq %r8,%r8 je .L_enc_dec_abort_keEetjmxflGqBfv xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 movl (%rdx),%eax orq %rax,%rax je .L_partial_block_done_fhsskwCeFatEtrh movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 leaq 80(%rsi),%r10 vmovdqu64 240(%r10),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %rax,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%rax,1),%r13 subq $16,%r13 jge .L_no_extra_mask_fhsskwCeFatEtrh subq %r13,%r12 .L_no_extra_mask_fhsskwCeFatEtrh: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_fhsskwCeFatEtrh .byte 98,243,13,8,68,252,17 .byte 98,115,13,8,68,212,0 .byte 98,115,13,8,68,220,1 .byte 98,115,13,8,68,244,16 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 .byte 98,83,37,8,68,214,1 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 .byte 98,83,37,8,68,214,0 vpsrldq $4,%xmm10,%xmm10 .byte 98,83,37,8,68,246,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movl $0,(%rdx) movq %rax,%r12 movq $16,%rax subq %r12,%rax jmp .L_enc_dec_done_fhsskwCeFatEtrh .L_partial_incomplete_fhsskwCeFatEtrh: addl %r8d,(%rdx) movq %r8,%rax .L_enc_dec_done_fhsskwCeFatEtrh: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%rax,2),%k1 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_fhsskwCeFatEtrh: vmovdqu64 0(%rsi),%xmm2 subq %rax,%r8 je .L_enc_dec_done_keEetjmxflGqBfv cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_keEetjmxflGqBfv vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_tpefFeFucnbumCh vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_tpefFeFucnbumCh .L_next_16_overflow_tpefFeFucnbumCh: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_tpefFeFucnbumCh: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%rax,1),%zmm0 vmovdqu8 64(%rcx,%rax,1),%zmm3 vmovdqu8 128(%rcx,%rax,1),%zmm4 vmovdqu8 192(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 176(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 192(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 208(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 224(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%rax,1) vmovdqu8 %zmm10,64(%r10,%rax,1) vmovdqu8 %zmm11,128(%r10,%rax,1) vmovdqu8 %zmm12,192(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_iDAhziwpjqoADaj vmovdqu64 192(%r12),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 128(%r12),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 64(%r12),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 0(%r12),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_iDAhziwpjqoADaj: cmpq $512,%r8 jb .L_message_below_32_blocks_keEetjmxflGqBfv cmpb $240,%r15b jae .L_next_16_overflow_qgCCeDpdgxsjtxo vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_qgCCeDpdgxsjtxo .L_next_16_overflow_qgCCeDpdgxsjtxo: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_qgCCeDpdgxsjtxo: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%rax,1),%zmm0 vmovdqu8 320(%rcx,%rax,1),%zmm3 vmovdqu8 384(%rcx,%rax,1),%zmm4 vmovdqu8 448(%rcx,%rax,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 32(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 48(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 64(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 80(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 96(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 112(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 128(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 144(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 160(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 176(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 192(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 208(%rdi),%zmm6 .byte 98,242,69,72,220,254 .byte 98,114,45,72,220,214 .byte 98,114,37,72,220,222 .byte 98,114,29,72,220,230 vbroadcastf64x2 224(%rdi),%zmm6 .byte 98,242,69,72,221,254 .byte 98,114,45,72,221,214 .byte 98,114,37,72,221,222 .byte 98,114,29,72,221,230 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%rax,1) vmovdqu8 %zmm10,320(%r10,%rax,1) vmovdqu8 %zmm11,384(%r10,%rax,1) vmovdqu8 %zmm12,448(%r10,%rax,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_ErxbfranEhsBGhe vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_ErxbfranEhsBGhe: movq $1,%r14 addq $512,%rax subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_keEetjmxflGqBfv .L_encrypt_big_nblocks_keEetjmxflGqBfv: cmpb $240,%r15b jae .L_16_blocks_overflow_budzEysnblsjtjq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_budzEysnblsjtjq .L_16_blocks_overflow_budzEysnblsjtjq: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_budzEysnblsjtjq: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_ojhGelucjaDDiwh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_ojhGelucjaDDiwh .L_16_blocks_overflow_ojhGelucjaDDiwh: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_ojhGelucjaDDiwh: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_jpAfkEctagbyfkB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_jpAfkEctagbyfkB .L_16_blocks_overflow_jpAfkEctagbyfkB: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_jpAfkEctagbyfkB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%rax,1),%zmm17 vmovdqu8 576(%rcx,%rax,1),%zmm19 vmovdqu8 640(%rcx,%rax,1),%zmm20 vmovdqu8 704(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%rax,1) vmovdqu8 %zmm3,576(%r10,%rax,1) vmovdqu8 %zmm4,640(%r10,%rax,1) vmovdqu8 %zmm5,704(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%rax subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_keEetjmxflGqBfv .L_no_more_big_nblocks_keEetjmxflGqBfv: cmpq $512,%r8 jae .L_encrypt_32_blocks_keEetjmxflGqBfv cmpq $256,%r8 jae .L_encrypt_16_blocks_keEetjmxflGqBfv .L_encrypt_0_blocks_ghash_32_keEetjmxflGqBfv: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_CjljsCjaoxujvDg cmpl $8,%r10d je .L_last_num_blocks_is_8_CjljsCjaoxujvDg jb .L_last_num_blocks_is_7_1_CjljsCjaoxujvDg cmpl $12,%r10d je .L_last_num_blocks_is_12_CjljsCjaoxujvDg jb .L_last_num_blocks_is_11_9_CjljsCjaoxujvDg cmpl $15,%r10d je .L_last_num_blocks_is_15_CjljsCjaoxujvDg ja .L_last_num_blocks_is_16_CjljsCjaoxujvDg cmpl $14,%r10d je .L_last_num_blocks_is_14_CjljsCjaoxujvDg jmp .L_last_num_blocks_is_13_CjljsCjaoxujvDg .L_last_num_blocks_is_11_9_CjljsCjaoxujvDg: cmpl $10,%r10d je .L_last_num_blocks_is_10_CjljsCjaoxujvDg ja .L_last_num_blocks_is_11_CjljsCjaoxujvDg jmp .L_last_num_blocks_is_9_CjljsCjaoxujvDg .L_last_num_blocks_is_7_1_CjljsCjaoxujvDg: cmpl $4,%r10d je .L_last_num_blocks_is_4_CjljsCjaoxujvDg jb .L_last_num_blocks_is_3_1_CjljsCjaoxujvDg cmpl $6,%r10d ja .L_last_num_blocks_is_7_CjljsCjaoxujvDg je .L_last_num_blocks_is_6_CjljsCjaoxujvDg jmp .L_last_num_blocks_is_5_CjljsCjaoxujvDg .L_last_num_blocks_is_3_1_CjljsCjaoxujvDg: cmpl $2,%r10d ja .L_last_num_blocks_is_3_CjljsCjaoxujvDg je .L_last_num_blocks_is_2_CjljsCjaoxujvDg .L_last_num_blocks_is_1_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_BpzosFahboxovuF vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_BpzosFahboxovuF .L_16_blocks_overflow_BpzosFahboxovuF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_BpzosFahboxovuF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_dzmevElEtmlqdvB subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dzmevElEtmlqdvB .L_small_initial_partial_block_dzmevElEtmlqdvB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_dzmevElEtmlqdvB .L_small_initial_compute_done_dzmevElEtmlqdvB: .L_after_reduction_dzmevElEtmlqdvB: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_2_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_idijgbEnolbjmvb vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_idijgbEnolbjmvb .L_16_blocks_overflow_idijgbEnolbjmvb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_idijgbEnolbjmvb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zvohpFFyvnbybFD subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zvohpFFyvnbybFD .L_small_initial_partial_block_zvohpFFyvnbybFD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zvohpFFyvnbybFD: orq %r8,%r8 je .L_after_reduction_zvohpFFyvnbybFD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zvohpFFyvnbybFD: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_3_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_wghnihbAoEsnemr vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_wghnihbAoEsnemr .L_16_blocks_overflow_wghnihbAoEsnemr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_wghnihbAoEsnemr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pjzlkCCsFsjiBsp subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pjzlkCCsFsjiBsp .L_small_initial_partial_block_pjzlkCCsFsjiBsp: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pjzlkCCsFsjiBsp: orq %r8,%r8 je .L_after_reduction_pjzlkCCsFsjiBsp vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pjzlkCCsFsjiBsp: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_4_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_CjzGmeouGBagvfs vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_CjzGmeouGBagvfs .L_16_blocks_overflow_CjzGmeouGBagvfs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_CjzGmeouGBagvfs: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_emFtlfDdrDiyoGj subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_emFtlfDdrDiyoGj .L_small_initial_partial_block_emFtlfDdrDiyoGj: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_emFtlfDdrDiyoGj: orq %r8,%r8 je .L_after_reduction_emFtlfDdrDiyoGj vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_emFtlfDdrDiyoGj: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_5_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_DgBblneEbhavoAc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_DgBblneEbhavoAc .L_16_blocks_overflow_DgBblneEbhavoAc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_DgBblneEbhavoAc: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vowzfgidatEfBqr subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vowzfgidatEfBqr .L_small_initial_partial_block_vowzfgidatEfBqr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vowzfgidatEfBqr: orq %r8,%r8 je .L_after_reduction_vowzfgidatEfBqr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vowzfgidatEfBqr: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_6_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_sswuqofDefGijpp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_sswuqofDefGijpp .L_16_blocks_overflow_sswuqofDefGijpp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_sswuqofDefGijpp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zaEschzpbmFozoB subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zaEschzpbmFozoB .L_small_initial_partial_block_zaEschzpbmFozoB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zaEschzpbmFozoB: orq %r8,%r8 je .L_after_reduction_zaEschzpbmFozoB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zaEschzpbmFozoB: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_7_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_lncoDbxzFvwogbC vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_lncoDbxzFvwogbC .L_16_blocks_overflow_lncoDbxzFvwogbC: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_lncoDbxzFvwogbC: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mbqrkjfyrCjFtkC subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mbqrkjfyrCjFtkC .L_small_initial_partial_block_mbqrkjfyrCjFtkC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mbqrkjfyrCjFtkC: orq %r8,%r8 je .L_after_reduction_mbqrkjfyrCjFtkC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_mbqrkjfyrCjFtkC: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_8_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_ExCdtGvwDseyezE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_ExCdtGvwDseyezE .L_16_blocks_overflow_ExCdtGvwDseyezE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_ExCdtGvwDseyezE: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ajbflDwBgvpaEcE subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ajbflDwBgvpaEcE .L_small_initial_partial_block_ajbflDwBgvpaEcE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ajbflDwBgvpaEcE: orq %r8,%r8 je .L_after_reduction_ajbflDwBgvpaEcE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ajbflDwBgvpaEcE: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_9_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_qnvdfsmvntyhGuo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_qnvdfsmvntyhGuo .L_16_blocks_overflow_qnvdfsmvntyhGuo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_qnvdfsmvntyhGuo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_kduEkAyqanCoGvE subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_kduEkAyqanCoGvE .L_small_initial_partial_block_kduEkAyqanCoGvE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_kduEkAyqanCoGvE: orq %r8,%r8 je .L_after_reduction_kduEkAyqanCoGvE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_kduEkAyqanCoGvE: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_10_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_ucjmDCDgtvwsblB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_ucjmDCDgtvwsblB .L_16_blocks_overflow_ucjmDCDgtvwsblB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_ucjmDCDgtvwsblB: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_grEzijkmcwkEkrv subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_grEzijkmcwkEkrv .L_small_initial_partial_block_grEzijkmcwkEkrv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_grEzijkmcwkEkrv: orq %r8,%r8 je .L_after_reduction_grEzijkmcwkEkrv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_grEzijkmcwkEkrv: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_11_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_tGfszqdtairfiAy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_tGfszqdtairfiAy .L_16_blocks_overflow_tGfszqdtairfiAy: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_tGfszqdtairfiAy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_nyiEcniDhxadvrv subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_nyiEcniDhxadvrv .L_small_initial_partial_block_nyiEcniDhxadvrv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_nyiEcniDhxadvrv: orq %r8,%r8 je .L_after_reduction_nyiEcniDhxadvrv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_nyiEcniDhxadvrv: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_12_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_GBxxxGGdrBGGAzv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_GBxxxGGdrBGGAzv .L_16_blocks_overflow_GBxxxGGdrBGGAzv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_GBxxxGGdrBGGAzv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vhoepgywGpbErsu subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vhoepgywGpbErsu .L_small_initial_partial_block_vhoepgywGpbErsu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vhoepgywGpbErsu: orq %r8,%r8 je .L_after_reduction_vhoepgywGpbErsu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vhoepgywGpbErsu: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_13_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_rFdlFzmcbwfmCFo vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_rFdlFzmcbwfmCFo .L_16_blocks_overflow_rFdlFzmcbwfmCFo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_rFdlFzmcbwfmCFo: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tidvwDCqozzjufl subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tidvwDCqozzjufl .L_small_initial_partial_block_tidvwDCqozzjufl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tidvwDCqozzjufl: orq %r8,%r8 je .L_after_reduction_tidvwDCqozzjufl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tidvwDCqozzjufl: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_14_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_yDllfugovhaluis vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_yDllfugovhaluis .L_16_blocks_overflow_yDllfugovhaluis: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_yDllfugovhaluis: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vGAylAjswesdfcA subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vGAylAjswesdfcA .L_small_initial_partial_block_vGAylAjswesdfcA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vGAylAjswesdfcA: orq %r8,%r8 je .L_after_reduction_vGAylAjswesdfcA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_vGAylAjswesdfcA: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_15_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_pincAkEEiiwgxGh vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_pincAkEEiiwgxGh .L_16_blocks_overflow_pincAkEEiiwgxGh: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_pincAkEEiiwgxGh: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FgsluzdCoDzfqdG subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FgsluzdCoDzfqdG .L_small_initial_partial_block_FgsluzdCoDzfqdG: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FgsluzdCoDzfqdG: orq %r8,%r8 je .L_after_reduction_FgsluzdCoDzfqdG vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FgsluzdCoDzfqdG: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_16_CjljsCjaoxujvDg: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_dBDAoEoFjhwcanb vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_dBDAoEoFjhwcanb .L_16_blocks_overflow_dBDAoEoFjhwcanb: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_dBDAoEoFjhwcanb: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_fdoxuvdoEsDrnFi: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fdoxuvdoEsDrnFi: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fdoxuvdoEsDrnFi: jmp .L_last_blocks_done_CjljsCjaoxujvDg .L_last_num_blocks_is_0_CjljsCjaoxujvDg: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_CjljsCjaoxujvDg: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_keEetjmxflGqBfv .L_encrypt_32_blocks_keEetjmxflGqBfv: cmpb $240,%r15b jae .L_16_blocks_overflow_wovDjxgtezsaCbn vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_wovDjxgtezsaCbn .L_16_blocks_overflow_wovDjxgtezsaCbn: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_wovDjxgtezsaCbn: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_qraoeizxDFojkGy vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_qraoeizxDFojkGy .L_16_blocks_overflow_qraoeizxDFojkGy: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_qraoeizxDFojkGy: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%rax,1),%zmm17 vmovdqu8 320(%rcx,%rax,1),%zmm19 vmovdqu8 384(%rcx,%rax,1),%zmm20 vmovdqu8 448(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%rax,1) vmovdqu8 %zmm3,320(%r10,%rax,1) vmovdqu8 %zmm4,384(%r10,%rax,1) vmovdqu8 %zmm5,448(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%rax movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_CafpBFgwEozfiCz cmpl $8,%r10d je .L_last_num_blocks_is_8_CafpBFgwEozfiCz jb .L_last_num_blocks_is_7_1_CafpBFgwEozfiCz cmpl $12,%r10d je .L_last_num_blocks_is_12_CafpBFgwEozfiCz jb .L_last_num_blocks_is_11_9_CafpBFgwEozfiCz cmpl $15,%r10d je .L_last_num_blocks_is_15_CafpBFgwEozfiCz ja .L_last_num_blocks_is_16_CafpBFgwEozfiCz cmpl $14,%r10d je .L_last_num_blocks_is_14_CafpBFgwEozfiCz jmp .L_last_num_blocks_is_13_CafpBFgwEozfiCz .L_last_num_blocks_is_11_9_CafpBFgwEozfiCz: cmpl $10,%r10d je .L_last_num_blocks_is_10_CafpBFgwEozfiCz ja .L_last_num_blocks_is_11_CafpBFgwEozfiCz jmp .L_last_num_blocks_is_9_CafpBFgwEozfiCz .L_last_num_blocks_is_7_1_CafpBFgwEozfiCz: cmpl $4,%r10d je .L_last_num_blocks_is_4_CafpBFgwEozfiCz jb .L_last_num_blocks_is_3_1_CafpBFgwEozfiCz cmpl $6,%r10d ja .L_last_num_blocks_is_7_CafpBFgwEozfiCz je .L_last_num_blocks_is_6_CafpBFgwEozfiCz jmp .L_last_num_blocks_is_5_CafpBFgwEozfiCz .L_last_num_blocks_is_3_1_CafpBFgwEozfiCz: cmpl $2,%r10d ja .L_last_num_blocks_is_3_CafpBFgwEozfiCz je .L_last_num_blocks_is_2_CafpBFgwEozfiCz .L_last_num_blocks_is_1_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_fekfutzigacvqDc vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_fekfutzigacvqDc .L_16_blocks_overflow_fekfutzigacvqDc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_fekfutzigacvqDc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_BqCzdBwrfgovfqg subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BqCzdBwrfgovfqg .L_small_initial_partial_block_BqCzdBwrfgovfqg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_BqCzdBwrfgovfqg .L_small_initial_compute_done_BqCzdBwrfgovfqg: .L_after_reduction_BqCzdBwrfgovfqg: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_2_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_zEwleqntmDxAeyd vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_zEwleqntmDxAeyd .L_16_blocks_overflow_zEwleqntmDxAeyd: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_zEwleqntmDxAeyd: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_eohifjbpuerrzyg subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_eohifjbpuerrzyg .L_small_initial_partial_block_eohifjbpuerrzyg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_eohifjbpuerrzyg: orq %r8,%r8 je .L_after_reduction_eohifjbpuerrzyg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_eohifjbpuerrzyg: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_3_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_acnffEtijrEjnxo vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_acnffEtijrEjnxo .L_16_blocks_overflow_acnffEtijrEjnxo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_acnffEtijrEjnxo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_doyzohBGtCkjnqc subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_doyzohBGtCkjnqc .L_small_initial_partial_block_doyzohBGtCkjnqc: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_doyzohBGtCkjnqc: orq %r8,%r8 je .L_after_reduction_doyzohBGtCkjnqc vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_doyzohBGtCkjnqc: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_4_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_uGhvhwlitDofjoE vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_uGhvhwlitDofjoE .L_16_blocks_overflow_uGhvhwlitDofjoE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_uGhvhwlitDofjoE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_CsCwmBEowahhzih subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_CsCwmBEowahhzih .L_small_initial_partial_block_CsCwmBEowahhzih: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_CsCwmBEowahhzih: orq %r8,%r8 je .L_after_reduction_CsCwmBEowahhzih vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_CsCwmBEowahhzih: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_5_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_BwnlahcxoBDAelu vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_BwnlahcxoBDAelu .L_16_blocks_overflow_BwnlahcxoBDAelu: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_BwnlahcxoBDAelu: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rBivbBgEnqzuoau subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rBivbBgEnqzuoau .L_small_initial_partial_block_rBivbBgEnqzuoau: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rBivbBgEnqzuoau: orq %r8,%r8 je .L_after_reduction_rBivbBgEnqzuoau vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rBivbBgEnqzuoau: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_6_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_ymfljrqweowoCvG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_ymfljrqweowoCvG .L_16_blocks_overflow_ymfljrqweowoCvG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_ymfljrqweowoCvG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Dlbqsuajgnhvlny subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Dlbqsuajgnhvlny .L_small_initial_partial_block_Dlbqsuajgnhvlny: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Dlbqsuajgnhvlny: orq %r8,%r8 je .L_after_reduction_Dlbqsuajgnhvlny vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Dlbqsuajgnhvlny: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_7_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_ijxrtlxzmzgCbiE vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_ijxrtlxzmzgCbiE .L_16_blocks_overflow_ijxrtlxzmzgCbiE: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_ijxrtlxzmzgCbiE: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Fpgnkfiyboaddsm subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Fpgnkfiyboaddsm .L_small_initial_partial_block_Fpgnkfiyboaddsm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Fpgnkfiyboaddsm: orq %r8,%r8 je .L_after_reduction_Fpgnkfiyboaddsm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Fpgnkfiyboaddsm: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_8_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_zzfxscwhyoakGqc vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_zzfxscwhyoakGqc .L_16_blocks_overflow_zzfxscwhyoakGqc: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_zzfxscwhyoakGqc: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xBdugFFrnyriCBE subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xBdugFFrnyriCBE .L_small_initial_partial_block_xBdugFFrnyriCBE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xBdugFFrnyriCBE: orq %r8,%r8 je .L_after_reduction_xBdugFFrnyriCBE vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xBdugFFrnyriCBE: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_9_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_hswtkcnEneBfnil vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_hswtkcnEneBfnil .L_16_blocks_overflow_hswtkcnEneBfnil: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_hswtkcnEneBfnil: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_lgbbvgiAttomlsy subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_lgbbvgiAttomlsy .L_small_initial_partial_block_lgbbvgiAttomlsy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_lgbbvgiAttomlsy: orq %r8,%r8 je .L_after_reduction_lgbbvgiAttomlsy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_lgbbvgiAttomlsy: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_10_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_EBzDixsnrGlAsGi vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_EBzDixsnrGlAsGi .L_16_blocks_overflow_EBzDixsnrGlAsGi: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_EBzDixsnrGlAsGi: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xdsDxBzahxmzysb subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xdsDxBzahxmzysb .L_small_initial_partial_block_xdsDxBzahxmzysb: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xdsDxBzahxmzysb: orq %r8,%r8 je .L_after_reduction_xdsDxBzahxmzysb vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xdsDxBzahxmzysb: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_11_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_qyEwjvzrfEfrwlG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_qyEwjvzrfEfrwlG .L_16_blocks_overflow_qyEwjvzrfEfrwlG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_qyEwjvzrfEfrwlG: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cmfyhuncjqoAhuh subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cmfyhuncjqoAhuh .L_small_initial_partial_block_cmfyhuncjqoAhuh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cmfyhuncjqoAhuh: orq %r8,%r8 je .L_after_reduction_cmfyhuncjqoAhuh vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cmfyhuncjqoAhuh: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_12_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_bcstjouersAefmz vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_bcstjouersAefmz .L_16_blocks_overflow_bcstjouersAefmz: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_bcstjouersAefmz: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iFhieggherswFAm subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iFhieggherswFAm .L_small_initial_partial_block_iFhieggherswFAm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iFhieggherswFAm: orq %r8,%r8 je .L_after_reduction_iFhieggherswFAm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iFhieggherswFAm: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_13_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_rymwDrficveEDaj vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_rymwDrficveEDaj .L_16_blocks_overflow_rymwDrficveEDaj: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_rymwDrficveEDaj: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aevCxqqBBnzfjmB subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aevCxqqBBnzfjmB .L_small_initial_partial_block_aevCxqqBBnzfjmB: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aevCxqqBBnzfjmB: orq %r8,%r8 je .L_after_reduction_aevCxqqBBnzfjmB vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aevCxqqBBnzfjmB: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_14_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_kzfnwbigglfewrl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_kzfnwbigglfewrl .L_16_blocks_overflow_kzfnwbigglfewrl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_kzfnwbigglfewrl: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_BirmupqDcbxwtda subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_BirmupqDcbxwtda .L_small_initial_partial_block_BirmupqDcbxwtda: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_BirmupqDcbxwtda: orq %r8,%r8 je .L_after_reduction_BirmupqDcbxwtda vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_BirmupqDcbxwtda: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_15_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_zpEbDAveGDqklle vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_zpEbDAveGDqklle .L_16_blocks_overflow_zpEbDAveGDqklle: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_zpEbDAveGDqklle: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_Djympovkdexblck subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_Djympovkdexblck .L_small_initial_partial_block_Djympovkdexblck: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_Djympovkdexblck: orq %r8,%r8 je .L_after_reduction_Djympovkdexblck vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_Djympovkdexblck: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_16_CafpBFgwEozfiCz: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_bjFGibBdktCEryt vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_bjFGibBdktCEryt .L_16_blocks_overflow_bjFGibBdktCEryt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_bjFGibBdktCEryt: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_pBmCpEpokBigCud: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pBmCpEpokBigCud: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_pBmCpEpokBigCud: jmp .L_last_blocks_done_CafpBFgwEozfiCz .L_last_num_blocks_is_0_CafpBFgwEozfiCz: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_CafpBFgwEozfiCz: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_keEetjmxflGqBfv .L_encrypt_16_blocks_keEetjmxflGqBfv: cmpb $240,%r15b jae .L_16_blocks_overflow_wmtckzeadccoCgk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_wmtckzeadccoCgk .L_16_blocks_overflow_wmtckzeadccoCgk: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_wmtckzeadccoCgk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,243,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%rax movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_adnhFzpjBkEnjlu cmpl $8,%r10d je .L_last_num_blocks_is_8_adnhFzpjBkEnjlu jb .L_last_num_blocks_is_7_1_adnhFzpjBkEnjlu cmpl $12,%r10d je .L_last_num_blocks_is_12_adnhFzpjBkEnjlu jb .L_last_num_blocks_is_11_9_adnhFzpjBkEnjlu cmpl $15,%r10d je .L_last_num_blocks_is_15_adnhFzpjBkEnjlu ja .L_last_num_blocks_is_16_adnhFzpjBkEnjlu cmpl $14,%r10d je .L_last_num_blocks_is_14_adnhFzpjBkEnjlu jmp .L_last_num_blocks_is_13_adnhFzpjBkEnjlu .L_last_num_blocks_is_11_9_adnhFzpjBkEnjlu: cmpl $10,%r10d je .L_last_num_blocks_is_10_adnhFzpjBkEnjlu ja .L_last_num_blocks_is_11_adnhFzpjBkEnjlu jmp .L_last_num_blocks_is_9_adnhFzpjBkEnjlu .L_last_num_blocks_is_7_1_adnhFzpjBkEnjlu: cmpl $4,%r10d je .L_last_num_blocks_is_4_adnhFzpjBkEnjlu jb .L_last_num_blocks_is_3_1_adnhFzpjBkEnjlu cmpl $6,%r10d ja .L_last_num_blocks_is_7_adnhFzpjBkEnjlu je .L_last_num_blocks_is_6_adnhFzpjBkEnjlu jmp .L_last_num_blocks_is_5_adnhFzpjBkEnjlu .L_last_num_blocks_is_3_1_adnhFzpjBkEnjlu: cmpl $2,%r10d ja .L_last_num_blocks_is_3_adnhFzpjBkEnjlu je .L_last_num_blocks_is_2_adnhFzpjBkEnjlu .L_last_num_blocks_is_1_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_GsxmuksbpmpGjAF vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_GsxmuksbpmpGjAF .L_16_blocks_overflow_GsxmuksbpmpGjAF: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_GsxmuksbpmpGjAF: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,8,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_fkgElsvknyCFraE subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fkgElsvknyCFraE .L_small_initial_partial_block_fkgElsvknyCFraE: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_fkgElsvknyCFraE .L_small_initial_compute_done_fkgElsvknyCFraE: .L_after_reduction_fkgElsvknyCFraE: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_2_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_EjdqvCnEusieimt vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_EjdqvCnEusieimt .L_16_blocks_overflow_EjdqvCnEusieimt: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_EjdqvCnEusieimt: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,40,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rouAoqaCpdDxjzF subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rouAoqaCpdDxjzF .L_small_initial_partial_block_rouAoqaCpdDxjzF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rouAoqaCpdDxjzF: orq %r8,%r8 je .L_after_reduction_rouAoqaCpdDxjzF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_rouAoqaCpdDxjzF: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_3_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_uctbCqtlugkklDD vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_uctbCqtlugkklDD .L_16_blocks_overflow_uctbCqtlugkklDD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_uctbCqtlugkklDD: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zkAAeakisCCFqgf subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zkAAeakisCCFqgf .L_small_initial_partial_block_zkAAeakisCCFqgf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zkAAeakisCCFqgf: orq %r8,%r8 je .L_after_reduction_zkAAeakisCCFqgf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zkAAeakisCCFqgf: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_4_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_gaqeqvovBwleCnk vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_gaqeqvovBwleCnk .L_16_blocks_overflow_gaqeqvovBwleCnk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_gaqeqvovBwleCnk: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_qihCqAlqxdsjyzm subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_qihCqAlqxdsjyzm .L_small_initial_partial_block_qihCqAlqxdsjyzm: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_qihCqAlqxdsjyzm: orq %r8,%r8 je .L_after_reduction_qihCqAlqxdsjyzm vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_qihCqAlqxdsjyzm: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_5_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_ocpzeCAdEaCuwqG vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_ocpzeCAdEaCuwqG .L_16_blocks_overflow_ocpzeCAdEaCuwqG: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_ocpzeCAdEaCuwqG: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GgfcCeubxmwGabf subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GgfcCeubxmwGabf .L_small_initial_partial_block_GgfcCeubxmwGabf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GgfcCeubxmwGabf: orq %r8,%r8 je .L_after_reduction_GgfcCeubxmwGabf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GgfcCeubxmwGabf: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_6_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_tCpEhfGhdbguevv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_tCpEhfGhdbguevv .L_16_blocks_overflow_tCpEhfGhdbguevv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_tCpEhfGhdbguevv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EcehrEgDvGgGxlr subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EcehrEgDvGgGxlr .L_small_initial_partial_block_EcehrEgDvGgGxlr: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EcehrEgDvGgGxlr: orq %r8,%r8 je .L_after_reduction_EcehrEgDvGgGxlr vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EcehrEgDvGgGxlr: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_7_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_yhnxntsqCvqmnAv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_yhnxntsqCvqmnAv .L_16_blocks_overflow_yhnxntsqCvqmnAv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_yhnxntsqCvqmnAv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dpnDdmEjpiBlsff subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dpnDdmEjpiBlsff .L_small_initial_partial_block_dpnDdmEjpiBlsff: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dpnDdmEjpiBlsff: orq %r8,%r8 je .L_after_reduction_dpnDdmEjpiBlsff vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dpnDdmEjpiBlsff: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_8_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_qhecknjsAigbdvl vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_qhecknjsAigbdvl .L_16_blocks_overflow_qhecknjsAigbdvl: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_qhecknjsAigbdvl: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_agskGinasntEiCl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_agskGinasntEiCl .L_small_initial_partial_block_agskGinasntEiCl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_agskGinasntEiCl: orq %r8,%r8 je .L_after_reduction_agskGinasntEiCl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_agskGinasntEiCl: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_9_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_bFfGEAqbwowecqr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_bFfGEAqbwowecqr .L_16_blocks_overflow_bFfGEAqbwowecqr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_bFfGEAqbwowecqr: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xuljsjGkGjfAtFa subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xuljsjGkGjfAtFa .L_small_initial_partial_block_xuljsjGkGjfAtFa: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xuljsjGkGjfAtFa: orq %r8,%r8 je .L_after_reduction_xuljsjGkGjfAtFa vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xuljsjGkGjfAtFa: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_10_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_rvpoAkotkmdfoGD vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_rvpoAkotkmdfoGD .L_16_blocks_overflow_rvpoAkotkmdfoGD: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_rvpoAkotkmdfoGD: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_xvxthCnBgzxznFe subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_xvxthCnBgzxznFe .L_small_initial_partial_block_xvxthCnBgzxznFe: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xvxthCnBgzxznFe: orq %r8,%r8 je .L_after_reduction_xvxthCnBgzxznFe vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xvxthCnBgzxznFe: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_11_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_vfjpDwaAwwnfAie vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_vfjpDwaAwwnfAie .L_16_blocks_overflow_vfjpDwaAwwnfAie: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_vfjpDwaAwwnfAie: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_aBbqBjAzrxyDsyu subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_aBbqBjAzrxyDsyu .L_small_initial_partial_block_aBbqBjAzrxyDsyu: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_aBbqBjAzrxyDsyu: orq %r8,%r8 je .L_after_reduction_aBbqBjAzrxyDsyu vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_aBbqBjAzrxyDsyu: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_12_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_sxuCEDavBFjsEdv vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_sxuCEDavBFjsEdv .L_16_blocks_overflow_sxuCEDavBFjsEdv: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_sxuCEDavBFjsEdv: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yChicojCCAAFCdn subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yChicojCCAAFCdn .L_small_initial_partial_block_yChicojCCAAFCdn: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yChicojCCAAFCdn: orq %r8,%r8 je .L_after_reduction_yChicojCCAAFCdn vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yChicojCCAAFCdn: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_13_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_qqAerGvEyeduCgs vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_qqAerGvEyeduCgs .L_16_blocks_overflow_qqAerGvEyeduCgs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_qqAerGvEyeduCgs: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zkhmCnldAfcumwl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zkhmCnldAfcumwl .L_small_initial_partial_block_zkhmCnldAfcumwl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zkhmCnldAfcumwl: orq %r8,%r8 je .L_after_reduction_zkhmCnldAfcumwl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zkhmCnldAfcumwl: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_14_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_tiwfklfdCbEnvFe vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_tiwfklfdCbEnvFe .L_16_blocks_overflow_tiwfklfdCbEnvFe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_tiwfklfdCbEnvFe: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_fFfrqpdqbcvGzmv subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_fFfrqpdqbcvGzmv .L_small_initial_partial_block_fFfrqpdqbcvGzmv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fFfrqpdqbcvGzmv: orq %r8,%r8 je .L_after_reduction_fFfrqpdqbcvGzmv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_fFfrqpdqbcvGzmv: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_15_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_BatgsGhBnhqnqnx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_BatgsGhBnhqnqnx .L_16_blocks_overflow_BatgsGhBnhqnqnx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_BatgsGhBnhqnqnx: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ysbBlvhzxEdeEFl subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ysbBlvhzxEdeEFl .L_small_initial_partial_block_ysbBlvhzxEdeEFl: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ysbBlvhzxEdeEFl: orq %r8,%r8 je .L_after_reduction_ysbBlvhzxEdeEFl vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_ysbBlvhzxEdeEFl: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_16_adnhFzpjBkEnjlu: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_CuxvqEazAfGjsCp vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_CuxvqEazAfGjsCp .L_16_blocks_overflow_CuxvqEazAfGjsCp: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_CuxvqEazAfGjsCp: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,115,125,0,68,239,1 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,83,125,0,68,229,0 vpsrldq $4,%xmm12,%xmm12 .byte 98,83,125,0,68,253,16 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_cGvBxlvhpkhxlhv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cGvBxlvhpkhxlhv: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_cGvBxlvhpkhxlhv: jmp .L_last_blocks_done_adnhFzpjBkEnjlu .L_last_num_blocks_is_0_adnhFzpjBkEnjlu: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_adnhFzpjBkEnjlu: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_keEetjmxflGqBfv .L_message_below_32_blocks_keEetjmxflGqBfv: subq $256,%r8 addq $256,%rax movl %r8d,%r10d leaq 80(%rsi),%r12 testq %r14,%r14 jnz .L_skip_hkeys_precomputation_wDAhpcxxDdecsFn vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) .byte 98,243,93,72,68,243,17 .byte 98,243,93,72,68,251,0 .byte 98,115,93,72,68,211,1 .byte 98,243,93,72,68,227,16 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,252,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 .byte 98,243,45,72,68,252,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,228,16 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) .byte 98,243,85,72,68,243,17 .byte 98,243,85,72,68,251,0 .byte 98,115,85,72,68,211,1 .byte 98,243,85,72,68,235,16 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 .byte 98,243,45,72,68,253,1 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 .byte 98,243,45,72,68,253,0 vpsrldq $4,%zmm7,%zmm7 .byte 98,243,45,72,68,237,16 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_wDAhpcxxDdecsFn: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_uGbbotznadbtwnB cmpl $8,%r10d je .L_last_num_blocks_is_8_uGbbotznadbtwnB jb .L_last_num_blocks_is_7_1_uGbbotznadbtwnB cmpl $12,%r10d je .L_last_num_blocks_is_12_uGbbotznadbtwnB jb .L_last_num_blocks_is_11_9_uGbbotznadbtwnB cmpl $15,%r10d je .L_last_num_blocks_is_15_uGbbotznadbtwnB ja .L_last_num_blocks_is_16_uGbbotznadbtwnB cmpl $14,%r10d je .L_last_num_blocks_is_14_uGbbotznadbtwnB jmp .L_last_num_blocks_is_13_uGbbotznadbtwnB .L_last_num_blocks_is_11_9_uGbbotznadbtwnB: cmpl $10,%r10d je .L_last_num_blocks_is_10_uGbbotznadbtwnB ja .L_last_num_blocks_is_11_uGbbotznadbtwnB jmp .L_last_num_blocks_is_9_uGbbotznadbtwnB .L_last_num_blocks_is_7_1_uGbbotznadbtwnB: cmpl $4,%r10d je .L_last_num_blocks_is_4_uGbbotznadbtwnB jb .L_last_num_blocks_is_3_1_uGbbotznadbtwnB cmpl $6,%r10d ja .L_last_num_blocks_is_7_uGbbotznadbtwnB je .L_last_num_blocks_is_6_uGbbotznadbtwnB jmp .L_last_num_blocks_is_5_uGbbotznadbtwnB .L_last_num_blocks_is_3_1_uGbbotznadbtwnB: cmpl $2,%r10d ja .L_last_num_blocks_is_3_uGbbotznadbtwnB je .L_last_num_blocks_is_2_uGbbotznadbtwnB .L_last_num_blocks_is_1_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_cChwjnmCkfzrqax vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_cChwjnmCkfzrqax .L_16_blocks_overflow_cChwjnmCkfzrqax: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_cChwjnmCkfzrqax: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,8,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,8,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%xmm17{%k1}{z} .byte 98,146,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,8,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,8,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,8,220,199 .byte 98,146,125,8,221,198 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_EctpihchBbzjuhh subq $16,%r8 movl $0,(%rdx) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EctpihchBbzjuhh .L_small_initial_partial_block_EctpihchBbzjuhh: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 .byte 98,147,125,8,68,217,1 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 .byte 98,243,125,8,68,227,0 vpsrldq $4,%xmm4,%xmm4 .byte 98,115,125,8,68,243,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_EctpihchBbzjuhh .L_small_initial_compute_done_EctpihchBbzjuhh: .L_after_reduction_EctpihchBbzjuhh: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_2_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_DqmtBvrcgAqmgdw vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_DqmtBvrcgAqmgdw .L_16_blocks_overflow_DqmtBvrcgAqmgdw: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_DqmtBvrcgAqmgdw: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,40,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,40,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%ymm17{%k1}{z} .byte 98,146,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,40,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,40,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,40,220,199 .byte 98,146,125,40,221,198 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EyrkoajdjakxzkF subq $16,%r8 movl $0,(%rdx) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EyrkoajdjakxzkF .L_small_initial_partial_block_EyrkoajdjakxzkF: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%r10),%xmm1 .byte 98,243,117,0,68,225,1 .byte 98,243,117,0,68,233,16 .byte 98,243,117,0,68,193,17 .byte 98,243,117,0,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EyrkoajdjakxzkF: orq %r8,%r8 je .L_after_reduction_EyrkoajdjakxzkF vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EyrkoajdjakxzkF: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_3_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_hfDuCGGGEpbgAAo vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_hfDuCGGGEpbgAAo .L_16_blocks_overflow_hfDuCGGGEpbgAAo: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_hfDuCGGGEpbgAAo: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GfsyFzqqokxFwFx subq $16,%r8 movl $0,(%rdx) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GfsyFzqqokxFwFx .L_small_initial_partial_block_GfsyFzqqokxFwFx: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%r10),%ymm1 .byte 98,243,117,32,68,225,1 .byte 98,243,117,32,68,233,16 .byte 98,243,117,32,68,193,17 .byte 98,243,117,32,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GfsyFzqqokxFwFx: orq %r8,%r8 je .L_after_reduction_GfsyFzqqokxFwFx vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_GfsyFzqqokxFwFx: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_4_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 kmovq (%r10,%r11,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_wxaujbwbDrFxuhe vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_wxaujbwbDrFxuhe .L_16_blocks_overflow_wxaujbwbDrFxuhe: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_wxaujbwbDrFxuhe: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17{%k1}{z} .byte 98,146,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,125,72,221,198 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_tpadbsuBdepEgig subq $16,%r8 movl $0,(%rdx) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_tpadbsuBdepEgig .L_small_initial_partial_block_tpadbsuBdepEgig: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_tpadbsuBdepEgig: orq %r8,%r8 je .L_after_reduction_tpadbsuBdepEgig vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_tpadbsuBdepEgig: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_5_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_tEuoxeaCCDdhEFB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_tEuoxeaCCDdhEFB .L_16_blocks_overflow_tEuoxeaCCDdhEFB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_tEuoxeaCCDdhEFB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%xmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,8,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,8,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,8,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_srvwxdEwmxFwfhg subq $16,%r8 movl $0,(%rdx) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_srvwxdEwmxFwfhg .L_small_initial_partial_block_srvwxdEwmxFwfhg: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_srvwxdEwmxFwfhg: orq %r8,%r8 je .L_after_reduction_srvwxdEwmxFwfhg vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_srvwxdEwmxFwfhg: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_6_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_prosxFkubabgvzg vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_prosxFkubabgvzg .L_16_blocks_overflow_prosxFkubabgvzg: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_prosxFkubabgvzg: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%ymm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,40,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,40,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,40,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hzkBexFaxhsbChs subq $16,%r8 movl $0,(%rdx) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hzkBexFaxhsbChs .L_small_initial_partial_block_hzkBexFaxhsbChs: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 240(%r10),%xmm1 .byte 98,243,101,0,68,225,1 .byte 98,243,101,0,68,233,16 .byte 98,243,101,0,68,193,17 .byte 98,243,101,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hzkBexFaxhsbChs: orq %r8,%r8 je .L_after_reduction_hzkBexFaxhsbChs vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_hzkBexFaxhsbChs: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_7_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_aeeqyBehlbvnfnk vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_aeeqyBehlbvnfnk .L_16_blocks_overflow_aeeqyBehlbvnfnk: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_aeeqyBehlbvnfnk: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_AobapGtdiluagwq subq $16,%r8 movl $0,(%rdx) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_AobapGtdiluagwq .L_small_initial_partial_block_AobapGtdiluagwq: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 224(%r10),%ymm1 .byte 98,243,101,32,68,225,1 .byte 98,243,101,32,68,233,16 .byte 98,243,101,32,68,193,17 .byte 98,243,101,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_AobapGtdiluagwq: orq %r8,%r8 je .L_after_reduction_AobapGtdiluagwq vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_AobapGtdiluagwq: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_8_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $64,%r11 kmovq (%r10,%r11,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_rboylvBCxohyFxr vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_rboylvBCxohyFxr .L_16_blocks_overflow_rboylvBCxohyFxr: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_rboylvBCxohyFxr: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_iitjdnjexGtAzlA subq $16,%r8 movl $0,(%rdx) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_iitjdnjexGtAzlA .L_small_initial_partial_block_iitjdnjexGtAzlA: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%r10),%zmm1 .byte 98,115,117,64,68,193,17 .byte 98,227,117,64,68,241,0 .byte 98,99,117,64,68,241,1 .byte 98,99,117,64,68,249,16 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,101,64,68,225,1 .byte 98,243,101,64,68,233,16 .byte 98,243,101,64,68,193,17 .byte 98,243,101,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_iitjdnjexGtAzlA: orq %r8,%r8 je .L_after_reduction_iitjdnjexGtAzlA vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_iitjdnjexGtAzlA: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_9_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_kwzbcrnlszssDoA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_kwzbcrnlszssDoA .L_16_blocks_overflow_kwzbcrnlszssDoA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_kwzbcrnlszssDoA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%xmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,8,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,8,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,8,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_EFvfhGiioywrajC subq $16,%r8 movl $0,(%rdx) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_EFvfhGiioywrajC .L_small_initial_partial_block_EFvfhGiioywrajC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 192(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_EFvfhGiioywrajC: orq %r8,%r8 je .L_after_reduction_EFvfhGiioywrajC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_EFvfhGiioywrajC: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_10_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_hrbjfpBdCjiGnfs vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_hrbjfpBdCjiGnfs .L_16_blocks_overflow_hrbjfpBdCjiGnfs: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_hrbjfpBdCjiGnfs: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%ymm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,40,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,40,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,40,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FvsGiljtuwAeGuy subq $16,%r8 movl $0,(%rdx) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FvsGiljtuwAeGuy .L_small_initial_partial_block_FvsGiljtuwAeGuy: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 176(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,93,0,68,225,1 .byte 98,243,93,0,68,233,16 .byte 98,243,93,0,68,193,17 .byte 98,243,93,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FvsGiljtuwAeGuy: orq %r8,%r8 je .L_after_reduction_FvsGiljtuwAeGuy vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_FvsGiljtuwAeGuy: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_11_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_gffyuiCaEymxbgx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_gffyuiCaEymxbgx .L_16_blocks_overflow_gffyuiCaEymxbgx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_gffyuiCaEymxbgx: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yskGpojraEjuoeD subq $16,%r8 movl $0,(%rdx) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yskGpojraEjuoeD .L_small_initial_partial_block_yskGpojraEjuoeD: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 96(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 160(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,93,32,68,225,1 .byte 98,243,93,32,68,233,16 .byte 98,243,93,32,68,193,17 .byte 98,243,93,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yskGpojraEjuoeD: orq %r8,%r8 je .L_after_reduction_yskGpojraEjuoeD vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_yskGpojraEjuoeD: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_12_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $128,%r11 kmovq (%r10,%r11,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_hAjEfcezvfywBbB vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_hAjEfcezvfywBbB .L_16_blocks_overflow_hAjEfcezvfywBbB: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_hAjEfcezvfywBbB: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_dkgezBnEGtEnaGC subq $16,%r8 movl $0,(%rdx) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_dkgezBnEGtEnaGC .L_small_initial_partial_block_dkgezBnEGtEnaGC: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 80(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 144(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,93,64,68,225,1 .byte 98,243,93,64,68,233,16 .byte 98,243,93,64,68,193,17 .byte 98,243,93,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_dkgezBnEGtEnaGC: orq %r8,%r8 je .L_after_reduction_dkgezBnEGtEnaGC vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_dkgezBnEGtEnaGC: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_13_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_jsBqmgCzCrGvyyA vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_jsBqmgCzCrGvyyA .L_16_blocks_overflow_jsBqmgCzCrGvyyA: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_jsBqmgCzCrGvyyA: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%xmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,8,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,8,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,8,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_gEgFBvntjyjbGji subq $16,%r8 movl $0,(%rdx) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_gEgFBvntjyjbGji .L_small_initial_partial_block_gEgFBvntjyjbGji: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 64(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 128(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 192(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_gEgFBvntjyjbGji: orq %r8,%r8 je .L_after_reduction_gEgFBvntjyjbGji vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_gEgFBvntjyjbGji: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_14_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_muGuwhaFlxCtAii vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_muGuwhaFlxCtAii .L_16_blocks_overflow_muGuwhaFlxCtAii: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_muGuwhaFlxCtAii: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%ymm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,40,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,40,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,40,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zumDfmCofGawimf subq $16,%r8 movl $0,(%rdx) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zumDfmCofGawimf .L_small_initial_partial_block_zumDfmCofGawimf: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 48(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 112(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 176(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 240(%r10),%xmm1 .byte 98,243,85,0,68,225,1 .byte 98,243,85,0,68,233,16 .byte 98,243,85,0,68,193,17 .byte 98,243,85,0,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zumDfmCofGawimf: orq %r8,%r8 je .L_after_reduction_zumDfmCofGawimf vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_zumDfmCofGawimf: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_15_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_EpbiipkiGBkrvEx vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_EpbiipkiGBkrvEx .L_16_blocks_overflow_EpbiipkiGBkrvEx: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_EpbiipkiGBkrvEx: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_wohgmjgdAjDrcfv subq $16,%r8 movl $0,(%rdx) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_wohgmjgdAjDrcfv .L_small_initial_partial_block_wohgmjgdAjDrcfv: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 32(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 96(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 160(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 224(%r10),%ymm1 .byte 98,243,85,32,68,225,1 .byte 98,243,85,32,68,233,16 .byte 98,243,85,32,68,193,17 .byte 98,243,85,32,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_wohgmjgdAjDrcfv: orq %r8,%r8 je .L_after_reduction_wohgmjgdAjDrcfv vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_wohgmjgdAjDrcfv: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_16_uGbbotznadbtwnB: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r11 subq $192,%r11 kmovq (%r10,%r11,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_etuAklrEovqCDpq vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_etuAklrEovqCDpq .L_16_blocks_overflow_etuAklrEovqCDpq: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_etuAklrEovqCDpq: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 .byte 98,115,61,72,68,241,17 .byte 98,243,61,72,68,249,0 .byte 98,115,61,72,68,209,1 .byte 98,115,61,72,68,217,16 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 64(%rdi),%zmm30 .byte 98,227,61,72,68,225,16 .byte 98,227,61,72,68,233,1 .byte 98,227,61,72,68,201,17 .byte 98,227,61,72,68,217,0 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%rax,1),%zmm17 vmovdqu8 64(%rcx,%rax,1),%zmm19 vmovdqu8 128(%rcx,%rax,1),%zmm20 vmovdqu8 192(%rcx,%rax,1),%zmm21{%k1}{z} .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm31 .byte 98,51,77,64,68,250,16 .byte 98,163,77,64,68,194,1 .byte 98,51,77,64,68,226,17 .byte 98,51,77,64,68,234,0 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 160(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 192(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm31 .byte 98,146,125,72,220,198 .byte 98,146,101,72,220,222 .byte 98,146,93,72,220,230 .byte 98,146,85,72,220,238 vbroadcastf64x2 224(%rdi),%zmm30 .byte 98,146,125,72,220,199 .byte 98,146,101,72,220,223 .byte 98,146,93,72,220,231 .byte 98,146,85,72,220,239 .byte 98,146,125,72,221,198 .byte 98,146,101,72,221,222 .byte 98,146,93,72,221,230 .byte 98,146,85,72,221,238 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_xGuCpnrvibyoyay: movl %r8d,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 16(%r10),%zmm1 .byte 98,243,117,64,68,193,17 .byte 98,243,117,64,68,217,0 .byte 98,243,117,64,68,225,1 .byte 98,243,117,64,68,233,16 vmovdqu64 80(%r10),%zmm1 .byte 98,115,101,64,68,193,17 .byte 98,227,101,64,68,241,0 .byte 98,99,101,64,68,241,1 .byte 98,99,101,64,68,249,16 vmovdqu64 144(%r10),%zmm1 .byte 98,227,93,64,68,201,17 .byte 98,227,93,64,68,217,0 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 .byte 98,227,93,64,68,201,1 .byte 98,227,93,64,68,217,16 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 208(%r10),%ymm1 vinserti64x2 $2,240(%r10),%zmm1,%zmm1 .byte 98,243,85,64,68,225,1 .byte 98,243,85,64,68,233,16 .byte 98,243,85,64,68,193,17 .byte 98,243,85,64,68,217,0 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 .byte 98,243,117,8,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,117,8,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,117,8,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_xGuCpnrvibyoyay: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_xGuCpnrvibyoyay: jmp .L_last_blocks_done_uGbbotznadbtwnB .L_last_num_blocks_is_0_uGbbotznadbtwnB: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,196,17 .byte 98,211,21,72,68,220,0 .byte 98,211,21,72,68,228,1 .byte 98,211,21,72,68,236,16 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 .byte 98,211,21,72,68,244,17 .byte 98,211,21,72,68,252,0 .byte 98,83,21,72,68,212,1 .byte 98,83,21,72,68,220,16 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 .byte 98,147,93,8,68,193,1 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 .byte 98,243,93,8,68,216,0 vpsrldq $4,%xmm3,%xmm3 .byte 98,115,93,8,68,240,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_uGbbotznadbtwnB: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_keEetjmxflGqBfv .L_message_below_equal_16_blocks_keEetjmxflGqBfv: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_vkDeiBlhaznkthD jl .L_small_initial_num_blocks_is_7_1_vkDeiBlhaznkthD cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_vkDeiBlhaznkthD jl .L_small_initial_num_blocks_is_11_9_vkDeiBlhaznkthD cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_vkDeiBlhaznkthD cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_vkDeiBlhaznkthD cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_vkDeiBlhaznkthD jmp .L_small_initial_num_blocks_is_13_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_11_9_vkDeiBlhaznkthD: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_vkDeiBlhaznkthD cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_vkDeiBlhaznkthD jmp .L_small_initial_num_blocks_is_9_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_7_1_vkDeiBlhaznkthD: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_vkDeiBlhaznkthD jl .L_small_initial_num_blocks_is_3_1_vkDeiBlhaznkthD cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_vkDeiBlhaznkthD cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_vkDeiBlhaznkthD jmp .L_small_initial_num_blocks_is_5_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_3_1_vkDeiBlhaznkthD: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_vkDeiBlhaznkthD cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_1_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONEa(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%rax,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,8,220,199 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,8,221,199 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 leaq 80(%rsi),%r10 cmpq $16,%r8 jl .L_small_initial_partial_block_pelykqxdehCqvkk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pelykqxdehCqvkk .L_small_initial_partial_block_pelykqxdehCqvkk: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_pelykqxdehCqvkk .L_small_initial_compute_done_pelykqxdehCqvkk: .L_after_reduction_pelykqxdehCqvkk: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_2_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%rax,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,40,220,199 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,40,221,199 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ldyuFtpzipDvehA subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ldyuFtpzipDvehA .L_small_initial_partial_block_ldyuFtpzipDvehA: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%r10),%xmm20 .byte 98,179,77,8,68,228,1 .byte 98,179,77,8,68,236,16 .byte 98,179,77,8,68,196,17 .byte 98,179,77,8,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ldyuFtpzipDvehA: orq %r8,%r8 je .L_after_reduction_ldyuFtpzipDvehA vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ldyuFtpzipDvehA: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_3_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_trwwageihBqcfkh subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_trwwageihBqcfkh .L_small_initial_partial_block_trwwageihBqcfkh: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%r10),%ymm20 .byte 98,179,77,40,68,228,1 .byte 98,179,77,40,68,236,16 .byte 98,179,77,40,68,196,17 .byte 98,179,77,40,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_trwwageihBqcfkh: orq %r8,%r8 je .L_after_reduction_trwwageihBqcfkh vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_trwwageihBqcfkh: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_4_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%rax,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 leaq 80(%rsi),%r10 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_yotsdxeGEAxlmrj subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_yotsdxeGEAxlmrj .L_small_initial_partial_block_yotsdxeGEAxlmrj: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_yotsdxeGEAxlmrj: orq %r8,%r8 je .L_after_reduction_yotsdxeGEAxlmrj vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_yotsdxeGEAxlmrj: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_5_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,8,220,223 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,8,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %xmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_rmwxtkgdnBhEnAk subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_rmwxtkgdnBhEnAk .L_small_initial_partial_block_rmwxtkgdnBhEnAk: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_rmwxtkgdnBhEnAk: orq %r8,%r8 je .L_after_reduction_rmwxtkgdnBhEnAk vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_rmwxtkgdnBhEnAk: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_6_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,40,220,223 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,40,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %ymm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_hGvwkbDFDGzDyAp subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_hGvwkbDFDGzDyAp .L_small_initial_partial_block_hGvwkbDFDGzDyAp: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 240(%r10),%xmm20 .byte 98,179,69,8,68,228,1 .byte 98,179,69,8,68,236,16 .byte 98,179,69,8,68,196,17 .byte 98,179,69,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_hGvwkbDFDGzDyAp: orq %r8,%r8 je .L_after_reduction_hGvwkbDFDGzDyAp vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_hGvwkbDFDGzDyAp: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_7_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_FchyqAlDxAtkgym subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_FchyqAlDxAtkgym .L_small_initial_partial_block_FchyqAlDxAtkgym: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 224(%r10),%ymm20 .byte 98,179,69,40,68,228,1 .byte 98,179,69,40,68,236,16 .byte 98,179,69,40,68,196,17 .byte 98,179,69,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_FchyqAlDxAtkgym: orq %r8,%r8 je .L_after_reduction_FchyqAlDxAtkgym vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_FchyqAlDxAtkgym: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_8_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 leaq 80(%rsi),%r10 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_ChlBCihfFcxfpre subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_ChlBCihfFcxfpre .L_small_initial_partial_block_ChlBCihfFcxfpre: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%r10),%zmm20 .byte 98,51,77,72,68,252,17 .byte 98,163,77,72,68,196,0 .byte 98,163,77,72,68,204,1 .byte 98,163,77,72,68,220,16 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,69,72,68,228,1 .byte 98,179,69,72,68,236,16 .byte 98,179,69,72,68,196,17 .byte 98,179,69,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_ChlBCihfFcxfpre: orq %r8,%r8 je .L_after_reduction_ChlBCihfFcxfpre vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_ChlBCihfFcxfpre: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_9_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,8,220,231 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,8,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %xmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_trxojfuEtotExGB subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_trxojfuEtotExGB .L_small_initial_partial_block_trxojfuEtotExGB: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 192(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_trxojfuEtotExGB: orq %r8,%r8 je .L_after_reduction_trxojfuEtotExGB vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_trxojfuEtotExGB: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_10_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,40,220,231 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,40,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %ymm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_zdivCCwEFvrsaiu subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_zdivCCwEFvrsaiu .L_small_initial_partial_block_zdivCCwEFvrsaiu: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 176(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,45,8,68,228,1 .byte 98,179,45,8,68,236,16 .byte 98,179,45,8,68,196,17 .byte 98,179,45,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_zdivCCwEFvrsaiu: orq %r8,%r8 je .L_after_reduction_zdivCCwEFvrsaiu vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_zdivCCwEFvrsaiu: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_11_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_cbByewwahwBzpzx subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_cbByewwahwBzpzx .L_small_initial_partial_block_cbByewwahwBzpzx: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 96(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 160(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,45,40,68,228,1 .byte 98,179,45,40,68,236,16 .byte 98,179,45,40,68,196,17 .byte 98,179,45,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_cbByewwahwBzpzx: orq %r8,%r8 je .L_after_reduction_cbByewwahwBzpzx vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_cbByewwahwBzpzx: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_12_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 leaq 80(%rsi),%r10 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_vpiEDoFuFgdvCsg subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_vpiEDoFuFgdvCsg .L_small_initial_partial_block_vpiEDoFuFgdvCsg: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 80(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 144(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,45,72,68,228,1 .byte 98,179,45,72,68,236,16 .byte 98,179,45,72,68,196,17 .byte 98,179,45,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_vpiEDoFuFgdvCsg: orq %r8,%r8 je .L_after_reduction_vpiEDoFuFgdvCsg vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_vpiEDoFuFgdvCsg: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_13_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,8,220,239 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,8,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %xmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_pkeazcEqwkcpavG subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_pkeazcEqwkcpavG .L_small_initial_partial_block_pkeazcEqwkcpavG: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 64(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 128(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 192(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_pkeazcEqwkcpavG: orq %r8,%r8 je .L_after_reduction_pkeazcEqwkcpavG vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_pkeazcEqwkcpavG: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_14_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,40,220,239 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,40,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %ymm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_GsuCukqqbwGpxDi subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_GsuCukqqbwGpxDi .L_small_initial_partial_block_GsuCukqqbwGpxDi: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 48(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 112(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 176(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 240(%r10),%xmm20 .byte 98,179,37,8,68,228,1 .byte 98,179,37,8,68,236,16 .byte 98,179,37,8,68,196,17 .byte 98,179,37,8,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_GsuCukqqbwGpxDi: orq %r8,%r8 je .L_after_reduction_GsuCukqqbwGpxDi vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_GsuCukqqbwGpxDi: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_15_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_mbxlopCmuqdpqjz subq $16,%r8 movl $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_mbxlopCmuqdpqjz .L_small_initial_partial_block_mbxlopCmuqdpqjz: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 32(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 96(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 160(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 224(%r10),%ymm20 .byte 98,179,37,40,68,228,1 .byte 98,179,37,40,68,236,16 .byte 98,179,37,40,68,196,17 .byte 98,179,37,40,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_mbxlopCmuqdpqjz: orq %r8,%r8 je .L_after_reduction_mbxlopCmuqdpqjz vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_mbxlopCmuqdpqjz: jmp .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD .L_small_initial_num_blocks_is_16_vkDeiBlhaznkthD: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%rax,1),%zmm6 vmovdqu8 64(%rcx,%rax,1),%zmm7 vmovdqu8 128(%rcx,%rax,1),%zmm10 vmovdqu8 192(%rcx,%rax,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 32(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 48(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 64(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 80(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 96(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 112(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 128(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 144(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 160(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 176(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 192(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 208(%rdi),%zmm15 .byte 98,210,125,72,220,199 .byte 98,210,101,72,220,223 .byte 98,210,93,72,220,231 .byte 98,210,85,72,220,239 vbroadcastf64x2 224(%rdi),%zmm15 .byte 98,210,125,72,221,199 .byte 98,210,101,72,221,223 .byte 98,210,93,72,221,231 .byte 98,210,85,72,221,239 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%rax,1) vmovdqu8 %zmm3,64(%r10,%rax,1) vmovdqu8 %zmm4,128(%r10,%rax,1) vmovdqu8 %zmm5,192(%r10,%rax,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 leaq 80(%rsi),%r10 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_fpGgFAenBuAyutw: movl %r8d,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 16(%r10),%zmm20 .byte 98,179,77,72,68,196,17 .byte 98,179,77,72,68,220,0 .byte 98,179,77,72,68,228,1 .byte 98,179,77,72,68,236,16 vmovdqu64 80(%r10),%zmm20 .byte 98,51,69,72,68,252,17 .byte 98,163,69,72,68,196,0 .byte 98,163,69,72,68,204,1 .byte 98,163,69,72,68,220,16 vmovdqu64 144(%r10),%zmm20 .byte 98,179,45,72,68,244,17 .byte 98,179,45,72,68,252,0 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 .byte 98,179,45,72,68,244,1 .byte 98,179,45,72,68,252,16 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 208(%r10),%ymm20 vinserti64x2 $2,240(%r10),%zmm20,%zmm20 .byte 98,179,37,72,68,228,1 .byte 98,179,37,72,68,236,16 .byte 98,179,37,72,68,196,17 .byte 98,179,37,72,68,220,0 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 .byte 98,243,93,0,68,227,1 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 .byte 98,243,93,0,68,236,0 vpsrldq $4,%xmm5,%xmm5 .byte 98,115,93,0,68,244,16 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_fpGgFAenBuAyutw: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_fpGgFAenBuAyutw: .L_small_initial_blocks_encrypted_vkDeiBlhaznkthD: .L_ghash_done_keEetjmxflGqBfv: vmovdqu64 %xmm2,0(%rsi) .L_enc_dec_done_keEetjmxflGqBfv: vpshufb SHUF_MASK(%rip),%xmm14,%xmm14 vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_abort_keEetjmxflGqBfv: jmp .Lexit_gcm_decrypt .Lexit_gcm_decrypt: cmpq $256,%r8 jbe .Lskip_hkeys_cleanup_byhoEGxnfawfFqd vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_byhoEGxnfawfFqd: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .byte 0xf3,0xc3 .Ldecrypt_seh_end: .cfi_endproc .size aes_gcm_decrypt_avx512, .-aes_gcm_decrypt_avx512 .section .rodata .align 16 POLY:.quad 0x0000000000000001, 0xC200000000000000 .align 64 POLY2: .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .align 16 TWOONE:.quad 0x0000000000000001, 0x0000000100000000 .align 64 SHUF_MASK: .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .align 16 SHIFT_MASK: .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 ALL_F: .quad 0xffffffffffffffff, 0xffffffffffffffff ZERO: .quad 0x0000000000000000, 0x0000000000000000 .align 16 ONEa: .quad 0x0000000000000001, 0x0000000000000000 .align 16 ONEf: .quad 0x0000000000000000, 0x0100000000000000 .align 64 ddq_add_1234: .quad 0x0000000000000001, 0x0000000000000000 .quad 0x0000000000000002, 0x0000000000000000 .quad 0x0000000000000003, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .align 64 ddq_add_5678: .quad 0x0000000000000005, 0x0000000000000000 .quad 0x0000000000000006, 0x0000000000000000 .quad 0x0000000000000007, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .align 64 ddq_add_4444: .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .align 64 ddq_add_8888: .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .align 64 ddq_addbe_1234: .quad 0x0000000000000000, 0x0100000000000000 .quad 0x0000000000000000, 0x0200000000000000 .quad 0x0000000000000000, 0x0300000000000000 .quad 0x0000000000000000, 0x0400000000000000 .align 64 ddq_addbe_4444: .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .align 64 byte_len_to_mask_table: .value 0x0000, 0x0001, 0x0003, 0x0007 .value 0x000f, 0x001f, 0x003f, 0x007f .value 0x00ff, 0x01ff, 0x03ff, 0x07ff .value 0x0fff, 0x1fff, 0x3fff, 0x7fff .value 0xffff .align 64 byte64_len_to_mask_table: .quad 0x0000000000000000, 0x0000000000000001 .quad 0x0000000000000003, 0x0000000000000007 .quad 0x000000000000000f, 0x000000000000001f .quad 0x000000000000003f, 0x000000000000007f .quad 0x00000000000000ff, 0x00000000000001ff .quad 0x00000000000003ff, 0x00000000000007ff .quad 0x0000000000000fff, 0x0000000000001fff .quad 0x0000000000003fff, 0x0000000000007fff .quad 0x000000000000ffff, 0x000000000001ffff .quad 0x000000000003ffff, 0x000000000007ffff .quad 0x00000000000fffff, 0x00000000001fffff .quad 0x00000000003fffff, 0x00000000007fffff .quad 0x0000000000ffffff, 0x0000000001ffffff .quad 0x0000000003ffffff, 0x0000000007ffffff .quad 0x000000000fffffff, 0x000000001fffffff .quad 0x000000003fffffff, 0x000000007fffffff .quad 0x00000000ffffffff, 0x00000001ffffffff .quad 0x00000003ffffffff, 0x00000007ffffffff .quad 0x0000000fffffffff, 0x0000001fffffffff .quad 0x0000003fffffffff, 0x0000007fffffffff .quad 0x000000ffffffffff, 0x000001ffffffffff .quad 0x000003ffffffffff, 0x000007ffffffffff .quad 0x00000fffffffffff, 0x00001fffffffffff .quad 0x00003fffffffffff, 0x00007fffffffffff .quad 0x0000ffffffffffff, 0x0001ffffffffffff .quad 0x0003ffffffffffff, 0x0007ffffffffffff .quad 0x000fffffffffffff, 0x001fffffffffffff .quad 0x003fffffffffffff, 0x007fffffffffffff .quad 0x00ffffffffffffff, 0x01ffffffffffffff .quad 0x03ffffffffffffff, 0x07ffffffffffffff .quad 0x0fffffffffffffff, 0x1fffffffffffffff .quad 0x3fffffffffffffff, 0x7fffffffffffffff .quad 0xffffffffffffffff .text #endif #endif