######################################################################## # Copyright(c) 2019 Arm Corporation All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # * Neither the name of Arm Corporation nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################### .arch armv8-a+crc+crypto .text .align 3 .global crc32_gzip_refl_hw_fold .type crc32_gzip_refl_hw_fold, %function /* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */ w_seed .req w0 w_crc .req w0 x_buf .req x1 x_len .req x2 x_buf_loop_end .req x10 x_buf_iter .req x10 x_tmp .req x15 w_tmp .req w15 d_c0 .req d3 d_c1 .req d1 v_c0 .req v3 v_c1 .req v1 crc32_gzip_refl_hw_fold: mvn w_seed, w_seed cmp x_len, 1023 mov x_buf_iter, x_buf bls .loop_fold_end sub x_buf_loop_end, x_len, #1024 and x_buf_loop_end, x_buf_loop_end, -1024 add x_buf_loop_end, x_buf_loop_end, 1024 add x_buf_loop_end, x_buf, x_buf_loop_end mov x_tmp, 0x819b movk x_tmp, 0xb486, lsl 16 fmov d_c0, x_tmp mov x_tmp, 0x8617 movk x_tmp, 0x7627, lsl 16 fmov d_c1, x_tmp x_in64 .req x3 w_crc0 .req w0 w_crc1 .req w4 w_crc2 .req w5 d_crc0 .req d4 d_crc1 .req d5 v_crc0 .req v4 v_crc1 .req v5 .align 3 .loop_fold: add x9, x_buf, 336 mov x_in64, x_buf mov w_crc1, 0 mov w_crc2, 0 .align 3 .loop_for: ldr x8, [x_in64] ldr x7, [x_in64, 336] ldr x6, [x_in64, 672] add x_in64, x_in64, 8 cmp x_in64, x9 crc32x w_crc0, w_crc0, x8 crc32x w_crc1, w_crc1, x7 crc32x w_crc2, w_crc2, x6 bne .loop_for uxtw x_tmp, w_crc0 fmov d_crc0, x_tmp pmull v_crc0.1q, v_crc0.1d, v_c0.1d uxtw x_tmp, w_crc1 fmov d_crc1, x_tmp pmull v_crc1.1q, v_crc1.1d, v_c1.1d ldr x_tmp, [x_buf, 1008] crc32x w_crc2, w_crc2, x_tmp fmov x_tmp, d_crc0 crc32x w_crc0, wzr, x_tmp fmov x_tmp, d_crc1 crc32x w_crc1, wzr, x_tmp eor w_crc0, w_crc0, w_crc1 eor w_crc0, w_crc0, w_crc2 ldr x_tmp, [x_buf, 1016] crc32x w_crc0, w_crc0, x_tmp add x_buf, x_buf, 1024 cmp x_buf_loop_end, x_buf bne .loop_fold and x_len, x_len, 1023 x_buf_loop_size8_end .req x3 .loop_fold_end: cmp x_len, 7 bls .size_4 sub x_buf_loop_size8_end, x_len, #8 and x_buf_loop_size8_end, x_buf_loop_size8_end, -8 add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end .align 3 .loop_size_8: ldr x_tmp, [x_buf_iter], 8 crc32x w_crc, w_crc, x_tmp cmp x_buf_iter, x_buf_loop_size8_end bne .loop_size_8 and x_len, x_len, 7 .size_4: cmp x_len, 3 bls .size_2 ldr w_tmp, [x_buf_iter], 4 crc32w w_crc, w_crc, w_tmp sub x_len, x_len, #4 .size_2: cmp x_len, 1 bls .size_1 ldrh w_tmp, [x_buf_iter], 2 crc32h w_crc, w_crc, w_tmp sub x_len, x_len, #2 .size_1: cbz x_len, .done ldrb w_tmp, [x_buf_iter] crc32b w_crc, w_crc, w_tmp .done: mvn w_crc, w_crc ret .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold