C x86/camellia-crypt-internal.asm ifelse(< Copyright (C) 2010, Niels Möller This file is part of GNU Nettle. GNU Nettle is free software: you can redistribute it and/or modify it under the terms of either: * the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. or * the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. or both in parallel, as here. GNU Nettle is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/. >) C Register usage: C Camellia state, 128-bit value in little endian order. C L0, H0 corresponds to D1 in the spec and i0 in the C implementation. C while L1, H1 corresponds to D2/i1. define(,<%eax>) define(,<%ebx>) define(,<%ecx>) define(

,<%edx>) define(,<%ebp>) define(,<%esi>) define(,<%edi>) C Locals on the stack define(, <(%esp)>) define(, <4(%esp)>) define(, <8(%esp)>) define(, <12(%esp)>) define(, <16(%esp)>) C Arguments on stack. define(, <40(%esp)>) define(, <44(%esp)>) define(, <48(%esp)>) define(, <52(%esp)>) define(, <56(%esp)>) define(, <60(%esp)>) define(, <(T,$1,4)>) define(, <1024(T,$1,4)>) define(, <2048(T,$1,4)>) define(, <3072(T,$1,4)>) C ROUND(xl, xh, yl, yh, key-offset) C xl and xh are rotated 16 bits at the end C yl and yh are read from stack, and left in registers define(, < movzbl LREG($1), TMP movl SP1110(TMP), $4 movzbl HREG($1), TMP xorl SP4404(TMP), $4 roll <$>16, $1 movzbl LREG($2), TMP movl SP4404(TMP), $3 movzbl HREG($2), TMP xorl SP3033(TMP), $3 roll <$>16, $2 movzbl LREG($1), TMP xorl SP3033(TMP), $4 movzbl HREG($1), TMP xorl SP0222(TMP), $4 movzbl LREG($2), TMP xorl SP0222(TMP), $3 movzbl HREG($2), TMP xorl SP1110(TMP), $3 xorl $5(KEY), $4 xorl $5 + 4(KEY), $3 xorl $3, $4 rorl <$>8, $3 xorl $4, $3 xorl FRAME_$3, $3 xorl FRAME_$4, $4 >) C Six rounds, with inputs and outputs in registers. define(, < movl L0, FRAME_L0 movl H0, FRAME_H0 movl L1, FRAME_L1 movl H1, FRAME_H1 ROUND(L0,H0,,

,0) movl L1, FRAME_L1 movl H1, FRAME_H1 ROUND(L1,H1,,,8) movl L0, FRAME_L0 movl H0, FRAME_H0 ROUND(L0,H0,,

,16) movl L1, FRAME_L1 movl H1, FRAME_H1 ROUND(L1,H1,,,24) movl L0, FRAME_L0 movl H0, FRAME_H0 ROUND(L0,H0,,

,32) ROUND(L1,H1,,,40) roll <$>16, L1 roll <$>16, H1 >) C FL(x0, x1, key-offset) define(, < movl $3 + 4(KEY), TMP andl $2, TMP roll <$>1, TMP xorl TMP, $1 movl $3(KEY), TMP orl $1, TMP xorl TMP, $2 >) C FLINV(x0, x1, key-offset) define(, < movl $3(KEY), TMP orl $1, TMP xorl TMP, $2 movl $3 + 4(KEY), TMP andl $2, TMP roll <$>1, TMP xorl TMP, $1 >) .file "camellia-crypt-internal.asm" C _camellia_crypt(unsigned nkeys, const uint64_t *keys, C const struct camellia_table *T, C size_t length, uint8_t *dst, C uint8_t *src) .text ALIGN(16) PROLOGUE(_nettle_camellia_crypt) C save all registers that need to be saved pushl %ebx C 32(%esp) pushl %ebp C 28(%esp) pushl %esi C 24(%esp) pushl %edi C 20(%esp) subl $20, %esp movl FRAME_LENGTH, %ebp testl %ebp,%ebp jz .Lend .Lblock_loop: C Load data, note that we'll happily do unaligned loads movl FRAME_SRC, TMP movl (TMP), H0 bswap H0 movl 4(TMP), L0 bswap L0 movl 8(TMP), H1 bswap H1 movl 12(TMP), L1 bswap L1 addl $16, FRAME_SRC movl FRAME_KEYS, KEY movl FRAME_NKEYS, TMP subl $8, TMP movl TMP, FRAME_CNT xorl (KEY), L0 xorl 4(KEY), H0 addl $8, KEY movl FRAME_TABLE, T ROUND6 .Lround_loop: addl $64, KEY FL(L0, H0, -16) FLINV(L1, H1, -8) ROUND6 subl $8, FRAME_CNT ja .Lround_loop movl FRAME_DST, TMP bswap H0 movl H0,8(TMP) bswap L0 movl L0,12(TMP) xorl 52(KEY), H1 bswap H1 movl H1, 0(TMP) xorl 48(KEY), L1 bswap L1 movl L1, 4(TMP) addl $16, FRAME_DST subl $16, FRAME_LENGTH ja .Lblock_loop .Lend: addl $20, %esp popl %edi popl %esi popl %ebp popl %ebx ret EPILOGUE(_nettle_camellia_crypt)