// This file is generated from a similarly-named Perl script in the BoringSSL // source tree. Do not edit by hand. #include #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) #include #if __ARM_MAX_ARCH__>=7 .text .arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) .fpu neon .code 32 #undef __thumb2__ .align 5 .Lrcon: .long 0x01,0x01,0x01,0x01 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat .long 0x1b,0x1b,0x1b,0x1b .text .globl aes_hw_set_encrypt_key .hidden aes_hw_set_encrypt_key .type aes_hw_set_encrypt_key,%function .align 5 aes_hw_set_encrypt_key: .Lenc_key: mov r3,#-1 cmp r0,#0 beq .Lenc_key_abort cmp r2,#0 beq .Lenc_key_abort mov r3,#-2 cmp r1,#128 blt .Lenc_key_abort cmp r1,#256 bgt .Lenc_key_abort tst r1,#0x3f bne .Lenc_key_abort adr r3,.Lrcon cmp r1,#192 veor q0,q0,q0 vld1.8 {q3},[r0]! mov r1,#8 @ reuse r1 vld1.32 {q1,q2},[r3]! blt .Loop128 @ 192-bit key support was removed. b .L256 .align 4 .Loop128: vtbl.8 d20,{q3},d4 vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 subs r1,r1,#1 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 bne .Loop128 vld1.32 {q1},[r3] vtbl.8 d20,{q3},d4 vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 vtbl.8 d20,{q3},d4 vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q10,q10,q1 veor q3,q3,q9 veor q3,q3,q10 vst1.32 {q3},[r2] add r2,r2,#0x50 mov r12,#10 b .Ldone @ 192-bit key support was removed. .align 4 .L256: vld1.8 {q8},[r0] mov r1,#7 mov r12,#14 vst1.32 {q3},[r2]! .Loop256: vtbl.8 d20,{q8},d4 vtbl.8 d21,{q8},d5 vext.8 q9,q0,q3,#12 vst1.32 {q8},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 subs r1,r1,#1 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 vst1.32 {q3},[r2]! beq .Ldone vdup.32 q10,d7[1] vext.8 q9,q0,q8,#12 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q8,q8,q9 vext.8 q9,q0,q9,#12 veor q8,q8,q9 vext.8 q9,q0,q9,#12 veor q8,q8,q9 veor q8,q8,q10 b .Loop256 .Ldone: str r12,[r2] mov r3,#0 .Lenc_key_abort: mov r0,r3 @ return value bx lr .size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key .globl aes_hw_encrypt .hidden aes_hw_encrypt .type aes_hw_encrypt,%function .align 5 aes_hw_encrypt: AARCH64_VALID_CALL_TARGET ldr r3,[r2,#240] vld1.32 {q0},[r2]! vld1.8 {q2},[r0] sub r3,r3,#2 vld1.32 {q1},[r2]! .Loop_enc: .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q0},[r2]! subs r3,r3,#2 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q1},[r2]! bgt .Loop_enc .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q0},[r2] .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 veor q2,q2,q0 vst1.8 {q2},[r1] bx lr .size aes_hw_encrypt,.-aes_hw_encrypt .globl aes_hw_ctr32_encrypt_blocks .hidden aes_hw_ctr32_encrypt_blocks .type aes_hw_ctr32_encrypt_blocks,%function .align 5 aes_hw_ctr32_encrypt_blocks: mov ip,sp stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so ldr r4, [ip] @ load remaining arg ldr r5,[r3,#240] ldr r8, [r4, #12] vld1.32 {q0},[r4] vld1.32 {q8,q9},[r3] @ load key schedule... sub r5,r5,#4 mov r12,#16 cmp r2,#2 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys sub r5,r5,#2 vld1.32 {q12,q13},[r7]! vld1.32 {q14,q15},[r7]! vld1.32 {q7},[r7] add r7,r3,#32 mov r6,r5 movlo r12,#0 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are @ affected by silicon errata #1742098 [0] and #1655431 [1], @ respectively, where the second instruction of an aese/aesmc @ instruction pair may execute twice if an interrupt is taken right @ after the first instruction consumes an input register of which a @ single 32-bit lane has been updated the last time it was modified. @ @ This function uses a counter in one 32-bit lane. The @ could write to q1 and q10 directly, but that trips this bugs. @ We write to q6 and copy to the final register as a workaround. @ @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice #ifndef __ARMEB__ rev r8, r8 #endif add r10, r8, #1 vorr q6,q0,q0 rev r10, r10 vmov.32 d13[1],r10 add r8, r8, #2 vorr q1,q6,q6 bls .Lctr32_tail rev r12, r8 vmov.32 d13[1],r12 sub r2,r2,#3 @ bias vorr q10,q6,q6 b .Loop3x_ctr32 .align 4 .Loop3x_ctr32: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 vld1.32 {q9},[r7]! bgt .Loop3x_ctr32 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 vld1.8 {q2},[r0]! add r9,r8,#1 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 vld1.8 {q3},[r0]! rev r9,r9 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 vld1.8 {q11},[r0]! mov r7,r3 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 veor q2,q2,q7 add r10,r8,#2 .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 veor q3,q3,q7 add r8,r8,#3 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 @ Note the logic to update q0, q1, and q1 is written to work @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in @ 32-bit mode. See the comment above. veor q11,q11,q7 vmov.32 d13[1], r9 .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 vorr q0,q6,q6 rev r10,r10 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 vmov.32 d13[1], r10 rev r12,r8 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 vorr q1,q6,q6 vmov.32 d13[1], r12 .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 vorr q10,q6,q6 subs r2,r2,#3 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 veor q2,q2,q4 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] vst1.8 {q2},[r1]! veor q3,q3,q5 mov r6,r5 vst1.8 {q3},[r1]! veor q11,q11,q9 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] vst1.8 {q11},[r1]! bhs .Loop3x_ctr32 adds r2,r2,#3 beq .Lctr32_done cmp r2,#1 mov r12,#16 moveq r12,#0 .Lctr32_tail: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.32 {q9},[r7]! bgt .Lctr32_tail .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.8 {q2},[r0],r12 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.8 {q3},[r0] .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 veor q2,q2,q7 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 veor q3,q3,q7 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 cmp r2,#1 veor q2,q2,q0 veor q3,q3,q1 vst1.8 {q2},[r1]! beq .Lctr32_done vst1.8 {q3},[r1] .Lctr32_done: vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks #endif #endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)