/*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ /* without modification, are permitted provided that the following */ /* conditions are met: */ /* */ /* 1. Redistributions of source code must retain the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer. */ /* */ /* 2. Redistributions in binary form must reproduce the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer in the documentation and/or other materials */ /* provided with the distribution. */ /* */ /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ /* */ /* The views and conclusions contained in the software and */ /* documentation are those of the authors and should not be */ /* interpreted as representing official policies, either expressed */ /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ #define ASSEMBLER #include "common.h" #define M $4 #define N $5 #define K $6 #define A $9 #define B $10 #define C $11 #define LDC $8 #define AO $12 #define BO $13 #define I $2 #define J $3 #define L $7 #define CO1 $14 #define CO2 $15 #define CO3 $16 #define CO4 $17 #define CO5 $18 #define CO6 $19 #define CO7 $20 #define CO8 $21 #if defined(TRMMKERNEL) #define OFFSET $22 #define KK $23 #define TEMP $24 #endif #define a1 $f0 #define a2 $f1 #define a3 $f28 #define a4 $f29 #define b1 $f2 #define b2 $f3 #define b3 $f4 #define b4 $f5 #define b5 $f6 #define b6 $f7 #define b7 $f8 #define b8 $f9 #define a5 b8 #define c11 $f10 #define c12 $f11 #define c21 $f12 #define c22 $f13 #define c31 $f14 #define c32 $f17 #define c41 $f18 #define c42 $f19 #define c51 $f20 #define c52 $f21 #define c61 $f22 #define c62 $f23 #define c71 $f24 #define c72 $f25 #define c81 $f26 #define c82 $f27 #define ALPHA_R $f15 #define ALPHA_I $f16 PROLOGUE daddiu $sp, $sp, -128 SDARG $16, 0($sp) SDARG $17, 8($sp) SDARG $18, 16($sp) SDARG $19, 24($sp) SDARG $20, 32($sp) SDARG $21, 40($sp) sdc1 $f24, 48($sp) sdc1 $f25, 56($sp) sdc1 $f26, 64($sp) sdc1 $f27, 72($sp) sdc1 $f28, 80($sp) sdc1 $f29, 88($sp) LDARG LDC, 128($sp) dsll LDC, LDC, ZBASE_SHIFT dsra J, N, 3 blez J, .L30 nop .L10: move CO1, C MTC $0, c11 daddu CO2, C, LDC move AO, A daddu CO3, CO2, LDC daddiu J, J, -1 daddu CO4, CO3, LDC MOV c21, c11 daddu CO5, CO4, LDC MOV c31, c11 daddu CO6, CO5, LDC MOV c41, c11 daddu CO7, CO6, LDC MOV c51, c11 daddu CO8, CO7, LDC dsra I, M, 1 daddu C, CO8, LDC blez I, .L20 MOV c61, c11 .L11: LD a1, 0 * SIZE(AO) MOV c71, c11 LD b1, 0 * SIZE(B) MOV c81, c11 LD a3, 4 * SIZE(AO) MOV c12, c11 LD b2, 1 * SIZE(B) MOV c22, c11 dsra L, K, 2 MOV c32, c11 LD b3, 2 * SIZE(B) MOV c42, c11 LD b4, 3 * SIZE(B) MOV c52, c11 LD b5, 4 * SIZE(B) MOV c62, c11 LD b6, 8 * SIZE(B) MOV c72, c11 LD b7, 12 * SIZE(B) MOV c82, c11 blez L, .L15 move BO, B MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 daddiu L, L, -1 MADD c31, c31, a1, b3 blez L, .L13 MADD c41, c41, a1, b4 NOP .align 3 .L12: MADD c12, c12, a2, b1 LD b1, 16 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) MADD c51, c51, a1, b5 LD a4, 2 * SIZE(AO) MADD c61, c61, a1, b2 NOP MADD c71, c71, a1, b3 NOP MADD c81, c81, a1, b4 LD a1, 8 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 20 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 9 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 10 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 11 * SIZE(BO) MADD c11, c11, a4, b6 LD a2, 3 * SIZE(AO) MADD c21, c21, a4, b2 NOP MADD c31, c31, a4, b3 NOP MADD c41, c41, a4, b4 NOP MADD c12, c12, a2, b6 LD b6, 24 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 13 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 14 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 15 * SIZE(BO) MADD c51, c51, a4, b7 NOP MADD c61, c61, a4, b2 NOP MADD c71, c71, a4, b3 NOP MADD c81, c81, a4, b4 NOP MADD c52, c52, a2, b7 LD b7, 28 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 17 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 18 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 19 * SIZE(BO) MADD c11, c11, a3, b1 LD a2, 5 * SIZE(AO) MADD c21, c21, a3, b2 NOP MADD c31, c31, a3, b3 NOP MADD c41, c41, a3, b4 NOP MADD c12, c12, a2, b1 LD b1, 32 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 21 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 22 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 23 * SIZE(BO) MADD c51, c51, a3, b5 LD a4, 6 * SIZE(AO) MADD c61, c61, a3, b2 NOP MADD c71, c71, a3, b3 NOP MADD c81, c81, a3, b4 LD a3, 12 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 36 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 25 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 26 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 27 * SIZE(BO) MADD c11, c11, a4, b6 LD a2, 7 * SIZE(AO) MADD c21, c21, a4, b2 NOP MADD c31, c31, a4, b3 NOP MADD c41, c41, a4, b4 daddiu L, L, -1 MADD c12, c12, a2, b6 LD b6, 40 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 29 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 30 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 31 * SIZE(BO) MADD c51, c51, a4, b7 daddiu BO, BO, 32 * SIZE MADD c61, c61, a4, b2 daddiu AO, AO, 8 * SIZE MADD c71, c71, a4, b3 NOP MADD c81, c81, a4, b4 NOP MADD c52, c52, a2, b7 LD b7, 12 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 1 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 2 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 3 * SIZE(BO) MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 NOP MADD c31, c31, a1, b3 bgtz L, .L12 MADD c41, c41, a1, b4 NOP .align 3 .L13: MADD c12, c12, a2, b1 LD b1, 16 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) MADD c51, c51, a1, b5 NOP MADD c61, c61, a1, b2 LD a4, 2 * SIZE(AO) MADD c71, c71, a1, b3 NOP MADD c81, c81, a1, b4 LD a1, 8 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 20 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 9 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 10 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 11 * SIZE(BO) MADD c11, c11, a4, b6 LD a2, 3 * SIZE(AO) MADD c21, c21, a4, b2 NOP MADD c31, c31, a4, b3 NOP MADD c41, c41, a4, b4 NOP MADD c12, c12, a2, b6 LD b6, 24 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 13 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 14 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 15 * SIZE(BO) MADD c51, c51, a4, b7 NOP MADD c61, c61, a4, b2 NOP MADD c71, c71, a4, b3 NOP MADD c81, c81, a4, b4 NOP MADD c52, c52, a2, b7 LD b7, 28 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 17 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 18 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 19 * SIZE(BO) MADD c11, c11, a3, b1 LD a2, 5 * SIZE(AO) MADD c21, c21, a3, b2 NOP MADD c31, c31, a3, b3 NOP MADD c41, c41, a3, b4 NOP MADD c12, c12, a2, b1 LD b1, 32 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 21 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 22 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 23 * SIZE(BO) MADD c51, c51, a3, b5 NOP MADD c61, c61, a3, b2 LD a4, 6 * SIZE(AO) MADD c71, c71, a3, b3 NOP MADD c81, c81, a3, b4 LD a3, 12 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 36 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 25 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 26 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 27 * SIZE(BO) MADD c11, c11, a4, b6 LD a2, 7 * SIZE(AO) MADD c21, c21, a4, b2 NOP MADD c31, c31, a4, b3 NOP MADD c41, c41, a4, b4 NOP MADD c12, c12, a2, b6 LD b6, 40 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 29 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 30 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 31 * SIZE(BO) MADD c51, c51, a4, b7 daddiu BO, BO, 32 * SIZE MADD c61, c61, a4, b2 daddiu AO, AO, 8 * SIZE MADD c71, c71, a4, b3 NOP MADD c81, c81, a4, b4 NOP MADD c52, c52, a2, b7 LD b7, 12 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 1 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 2 * SIZE(BO) MADD c82, c82, a2, b4 LD b4, 3 * SIZE(BO) .align 3 .L15: andi L, K, 3 NOP blez L, .L18 NOP .align 3 .L16: MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 NOP MADD c31, c31, a1, b3 NOP MADD c41, c41, a1, b4 NOP MADD c12, c12, a2, b1 LD b1, 8 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) MADD c51, c51, a1, b5 daddiu L, L, -1 MADD c61, c61, a1, b2 daddiu AO, AO, 2 * SIZE MADD c71, c71, a1, b3 daddiu BO, BO, 8 * SIZE MADD c81, c81, a1, b4 LD a1, 0 * SIZE(AO) MADD c52, c52, a2, b5 LD b5, 4 * SIZE(BO) MADD c62, c62, a2, b2 LD b2, 1 * SIZE(BO) MADD c72, c72, a2, b3 LD b3, 2 * SIZE(BO) MADD c82, c82, a2, b4 bgtz L, .L16 LD b4, 3 * SIZE(BO) .L18: LD $f0, 0 * SIZE(CO1) LD $f1, 1 * SIZE(CO1) LD $f2, 2 * SIZE(CO1) LD $f3, 3 * SIZE(CO1) LD $f4, 0 * SIZE(CO2) MADD $f0, $f0, ALPHA_R, c11 LD $f5, 1 * SIZE(CO2) MADD $f1, $f1, ALPHA_I, c11 LD $f6, 2 * SIZE(CO2) MADD $f2, $f2, ALPHA_R, c12 LD $f7, 3 * SIZE(CO2) MADD $f3, $f3, ALPHA_I, c12 MADD $f4, $f4, ALPHA_R, c21 ST $f0, 0 * SIZE(CO1) MADD $f5, $f5, ALPHA_I, c21 ST $f1, 1 * SIZE(CO1) MADD $f6, $f6, ALPHA_R, c22 ST $f2, 2 * SIZE(CO1) MADD $f7, $f7, ALPHA_I, c22 ST $f3, 3 * SIZE(CO1) LD $f0, 0 * SIZE(CO3) LD $f1, 1 * SIZE(CO3) LD $f2, 2 * SIZE(CO3) LD $f3, 3 * SIZE(CO3) ST $f4, 0 * SIZE(CO2) ST $f5, 1 * SIZE(CO2) ST $f6, 2 * SIZE(CO2) ST $f7, 3 * SIZE(CO2) LD $f4, 0 * SIZE(CO4) LD $f5, 1 * SIZE(CO4) LD $f6, 2 * SIZE(CO4) LD $f7, 3 * SIZE(CO4) MADD $f0, $f0, ALPHA_R, c31 MADD $f1, $f1, ALPHA_I, c31 MADD $f2, $f2, ALPHA_R, c32 MADD $f3, $f3, ALPHA_I, c32 MADD $f4, $f4, ALPHA_R, c41 ST $f0, 0 * SIZE(CO3) MADD $f5, $f5, ALPHA_I, c41 ST $f1, 1 * SIZE(CO3) MADD $f6, $f6, ALPHA_R, c42 ST $f2, 2 * SIZE(CO3) MADD $f7, $f7, ALPHA_I, c42 ST $f3, 3 * SIZE(CO3) LD $f0, 0 * SIZE(CO5) LD $f1, 1 * SIZE(CO5) LD $f2, 2 * SIZE(CO5) LD $f3, 3 * SIZE(CO5) ST $f4, 0 * SIZE(CO4) ST $f5, 1 * SIZE(CO4) ST $f6, 2 * SIZE(CO4) ST $f7, 3 * SIZE(CO4) LD $f4, 0 * SIZE(CO6) LD $f5, 1 * SIZE(CO6) LD $f6, 2 * SIZE(CO6) LD $f7, 3 * SIZE(CO6) MADD $f0, $f0, ALPHA_R, c51 daddiu CO1,CO1, 4 * SIZE MADD $f1, $f1, ALPHA_I, c51 daddiu CO2,CO2, 4 * SIZE MADD $f2, $f2, ALPHA_R, c52 daddiu CO3,CO3, 4 * SIZE MADD $f3, $f3, ALPHA_I, c52 daddiu CO4,CO4, 4 * SIZE MADD $f4, $f4, ALPHA_R, c61 ST $f0, 0 * SIZE(CO5) MADD $f5, $f5, ALPHA_I, c61 ST $f1, 1 * SIZE(CO5) MADD $f6, $f6, ALPHA_R, c62 ST $f2, 2 * SIZE(CO5) MADD $f7, $f7, ALPHA_I, c62 ST $f3, 3 * SIZE(CO5) LD $f0, 0 * SIZE(CO7) LD $f1, 1 * SIZE(CO7) LD $f2, 2 * SIZE(CO7) LD $f3, 3 * SIZE(CO7) ST $f4, 0 * SIZE(CO6) ST $f5, 1 * SIZE(CO6) ST $f6, 2 * SIZE(CO6) ST $f7, 3 * SIZE(CO6) LD $f4, 0 * SIZE(CO8) daddiu I, I, -1 LD $f5, 1 * SIZE(CO8) MTC $0, c11 LD $f6, 2 * SIZE(CO8) LD $f7, 3 * SIZE(CO8) MADD $f0, $f0, ALPHA_R, c71 daddiu CO5,CO5, 4 * SIZE MADD $f1, $f1, ALPHA_I, c71 daddiu CO6,CO6, 4 * SIZE MADD $f2, $f2, ALPHA_R, c72 daddiu CO7,CO7, 4 * SIZE MADD $f3, $f3, ALPHA_I, c72 daddiu CO8,CO8, 4 * SIZE MADD $f4, $f4, ALPHA_R, c81 ST $f0, -4 * SIZE(CO7) MADD $f5, $f5, ALPHA_I, c81 ST $f1, -3 * SIZE(CO7) MADD $f6, $f6, ALPHA_R, c82 ST $f2, -2 * SIZE(CO7) MADD $f7, $f7, ALPHA_I, c82 ST $f3, -1 * SIZE(CO7) ST $f4, -4 * SIZE(CO8) MOV c21, c11 ST $f5, -3 * SIZE(CO8) MOV c31, c11 ST $f6, -2 * SIZE(CO8) MOV c41, c11 ST $f7, -1 * SIZE(CO8) MOV c51, c11 bgtz I, .L11 MOV c61, c11 .align 3 .L20: andi I, M, 1 MOV c61, c11 blez I, .L29 MOV c71, c11 LD a1, 0 * SIZE(AO) LD a2, 1 * SIZE(AO) LD a3, 2 * SIZE(AO) LD a4, 3 * SIZE(AO) LD b1, 0 * SIZE(B) LD b2, 1 * SIZE(B) LD b3, 2 * SIZE(B) LD b4, 3 * SIZE(B) LD b5, 4 * SIZE(B) LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) dsra L, K, 2 MOV c81, c11 blez L, .L25 move BO, B .align 3 .L22: MADD c11, c11, a1, b1 LD b1, 16 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a1, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a1, b4 LD b4, 7 * SIZE(BO) MADD c51, c51, a1, b5 LD b5, 20 * SIZE(BO) MADD c61, c61, a1, b2 LD b2, 9 * SIZE(BO) MADD c71, c71, a1, b3 LD b3, 10 * SIZE(BO) MADD c81, c81, a1, b4 LD b4, 11 * SIZE(BO) LD a1, 4 * SIZE(AO) daddiu L, L, -1 MADD c11, c11, a2, b6 LD b6, 24 * SIZE(BO) MADD c21, c21, a2, b2 LD b2, 13 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 14 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 15 * SIZE(BO) MADD c51, c51, a2, b7 LD b7, 28 * SIZE(BO) MADD c61, c61, a2, b2 LD b2, 17 * SIZE(BO) MADD c71, c71, a2, b3 LD b3, 18 * SIZE(BO) MADD c81, c81, a2, b4 LD b4, 19 * SIZE(BO) LD a2, 5 * SIZE(AO) daddiu AO, AO, 4 * SIZE MADD c11, c11, a3, b1 LD b1, 32 * SIZE(BO) MADD c21, c21, a3, b2 LD b2, 21 * SIZE(BO) MADD c31, c31, a3, b3 LD b3, 22 * SIZE(BO) MADD c41, c41, a3, b4 LD b4, 23 * SIZE(BO) MADD c51, c51, a3, b5 LD b5, 36 * SIZE(BO) MADD c61, c61, a3, b2 LD b2, 25 * SIZE(BO) MADD c71, c71, a3, b3 LD b3, 26 * SIZE(BO) MADD c81, c81, a3, b4 LD b4, 27 * SIZE(BO) LD a3, 2 * SIZE(AO) daddiu BO, BO, 32 * SIZE MADD c11, c11, a4, b6 LD b6, 8 * SIZE(BO) MADD c21, c21, a4, b2 LD b2, -3 * SIZE(BO) MADD c31, c31, a4, b3 LD b3, -2 * SIZE(BO) MADD c41, c41, a4, b4 LD b4, -1 * SIZE(BO) MADD c51, c51, a4, b7 LD b7, 12 * SIZE(BO) MADD c61, c61, a4, b2 LD b2, 1 * SIZE(BO) MADD c71, c71, a4, b3 LD b3, 2 * SIZE(BO) MADD c81, c81, a4, b4 LD b4, 3 * SIZE(BO) bgtz L, .L22 LD a4, 3 * SIZE(AO) .align 3 .L25: andi L, K, 3 NOP blez L, .L28 NOP .align 3 .L26: MADD c11, c11, a1, b1 LD b1, 8 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a1, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a1, b4 LD b4, 7 * SIZE(BO) daddiu L, L, -1 MOV a2, a2 daddiu AO, AO, 1 * SIZE daddiu BO, BO, 8 * SIZE MADD c51, c51, a1, b5 LD b5, 4 * SIZE(BO) MADD c61, c61, a1, b2 LD b2, 1 * SIZE(BO) MADD c71, c71, a1, b3 LD b3, 2 * SIZE(BO) MADD c81, c81, a1, b4 LD a1, 0 * SIZE(AO) bgtz L, .L26 LD b4, 3 * SIZE(BO) .L28: LD $f0, 0 * SIZE(CO1) LD $f1, 1 * SIZE(CO1) LD $f2, 0 * SIZE(CO2) LD $f3, 1 * SIZE(CO2) LD $f4, 0 * SIZE(CO3) MADD $f0, $f0, ALPHA_R, c11 LD $f5, 1 * SIZE(CO3) MADD $f1, $f1, ALPHA_I, c11 LD $f6, 0 * SIZE(CO4) MADD $f2, $f2, ALPHA_R, c21 LD $f7, 1 * SIZE(CO4) MADD $f3, $f3, ALPHA_I, c21 MADD $f4, $f4, ALPHA_R, c31 ST $f0, 0 * SIZE(CO1) MADD $f5, $f5, ALPHA_I, c31 ST $f1, 1 * SIZE(CO1) MADD $f6, $f6, ALPHA_R, c41 ST $f2, 0 * SIZE(CO2) MADD $f7, $f7, ALPHA_I, c41 ST $f3, 1 * SIZE(CO2) LD $f0, 0 * SIZE(CO5) LD $f1, 1 * SIZE(CO5) LD $f2, 0 * SIZE(CO6) LD $f3, 1 * SIZE(CO6) ST $f4, 0 * SIZE(CO3) ST $f5, 1 * SIZE(CO3) ST $f6, 0 * SIZE(CO4) ST $f7, 1 * SIZE(CO4) LD $f4, 0 * SIZE(CO7) MADD $f0, $f0, ALPHA_R, c51 LD $f5, 1 * SIZE(CO7) MADD $f1, $f1, ALPHA_I, c51 LD $f6, 0 * SIZE(CO8) MADD $f2, $f2, ALPHA_R, c61 LD $f7, 1 * SIZE(CO8) MADD $f3, $f3, ALPHA_I, c61 MADD $f4, $f4, ALPHA_R, c71 ST $f0, 0 * SIZE(CO5) MADD $f5, $f5, ALPHA_I, c71 ST $f1, 1 * SIZE(CO5) MADD $f6, $f6, ALPHA_R, c81 ST $f2, 0 * SIZE(CO6) MADD $f7, $f7, ALPHA_I, c81 ST $f3, 1 * SIZE(CO6) ST $f4, 0 * SIZE(CO7) ST $f5, 1 * SIZE(CO7) ST $f6, 0 * SIZE(CO8) ST $f7, 1 * SIZE(CO8) .align 3 .L29: bgtz J, .L10 move B, BO .align 3 .L30: andi J, N, 4 blez J, .L50 move AO, A move CO1, C MTC $0, c11 daddu CO2, C, LDC daddu CO3, CO2, LDC daddu CO4, CO3, LDC MOV c21, c11 daddu C, CO4, LDC MOV c31, c11 dsra I, M, 1 blez I, .L40 MOV c41, c11 .L31: LD a1, 0 * SIZE(AO) LD a3, 4 * SIZE(AO) LD b1, 0 * SIZE(B) MOV c12, c11 LD b2, 1 * SIZE(B) MOV c22, c11 LD b3, 2 * SIZE(B) MOV c32, c11 LD b4, 3 * SIZE(B) MOV c42, c11 LD b5, 4 * SIZE(B) dsra L, K, 2 LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) blez L, .L35 move BO, B .align 3 .L32: MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 daddiu L, L, -1 MADD c31, c31, a1, b3 NOP MADD c41, c41, a1, b4 LD a1, 2 * SIZE(AO) MADD c12, c12, a2, b1 LD b1, 16 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) MADD c11, c11, a1, b5 LD a2, 3 * SIZE(AO) MADD c21, c21, a1, b2 NOP MADD c31, c31, a1, b3 NOP MADD c41, c41, a1, b4 LD a1, 8 * SIZE(AO) MADD c12, c12, a2, b5 LD b5, 20 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 9 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 10 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 11 * SIZE(BO) MADD c11, c11, a3, b6 LD a2, 5 * SIZE(AO) MADD c21, c21, a3, b2 NOP MADD c31, c31, a3, b3 NOP MADD c41, c41, a3, b4 LD a3, 6 * SIZE(AO) MADD c12, c12, a2, b6 LD b6, 24 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 13 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 14 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 15 * SIZE(BO) MADD c11, c11, a3, b7 LD a2, 7 * SIZE(AO) MADD c21, c21, a3, b2 daddiu AO, AO, 8 * SIZE MADD c31, c31, a3, b3 daddiu BO, BO, 16 * SIZE MADD c41, c41, a3, b4 LD a3, 4 * SIZE(AO) MADD c12, c12, a2, b7 LD b7, 12 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 1 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 2 * SIZE(BO) MADD c42, c42, a2, b4 NOP bgtz L, .L32 LD b4, 3 * SIZE(BO) .align 3 .L35: andi L, K, 3 NOP blez L, .L38 NOP .align 3 .L36: MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 daddiu L, L, -1 MADD c31, c31, a1, b3 daddiu AO, AO, 2 * SIZE MADD c41, c41, a1, b4 LD a1, 0 * SIZE(AO) MADD c12, c12, a2, b1 LD b1, 4 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 5 * SIZE(BO) MADD c32, c32, a2, b3 LD b3, 6 * SIZE(BO) MADD c42, c42, a2, b4 LD b4, 7 * SIZE(BO) bgtz L, .L36 daddiu BO, BO, 4 * SIZE .L38: LD $f0, 0 * SIZE(CO1) LD $f1, 1 * SIZE(CO1) LD $f2, 2 * SIZE(CO1) LD $f3, 3 * SIZE(CO1) LD $f4, 0 * SIZE(CO2) LD $f5, 1 * SIZE(CO2) LD $f6, 2 * SIZE(CO2) LD $f7, 3 * SIZE(CO2) MADD $f0, $f0, ALPHA_R, c11 MADD $f1, $f1, ALPHA_I, c11 MADD $f2, $f2, ALPHA_R, c12 MADD $f3, $f3, ALPHA_I, c12 MADD $f4, $f4, ALPHA_R, c21 ST $f0, 0 * SIZE(CO1) MADD $f5, $f5, ALPHA_I, c21 ST $f1, 1 * SIZE(CO1) MADD $f6, $f6, ALPHA_R, c22 ST $f2, 2 * SIZE(CO1) MADD $f7, $f7, ALPHA_I, c22 ST $f3, 3 * SIZE(CO1) LD $f0, 0 * SIZE(CO3) LD $f1, 1 * SIZE(CO3) LD $f2, 2 * SIZE(CO3) LD $f3, 3 * SIZE(CO3) ST $f4, 0 * SIZE(CO2) MADD $f0, $f0, ALPHA_R, c31 ST $f5, 1 * SIZE(CO2) MADD $f1, $f1, ALPHA_I, c31 ST $f6, 2 * SIZE(CO2) MADD $f2, $f2, ALPHA_R, c32 ST $f7, 3 * SIZE(CO2) MADD $f3, $f3, ALPHA_I, c32 LD $f4, 0 * SIZE(CO4) LD $f5, 1 * SIZE(CO4) LD $f6, 2 * SIZE(CO4) LD $f7, 3 * SIZE(CO4) MADD $f4, $f4, ALPHA_R, c41 daddiu CO1,CO1, 4 * SIZE MADD $f5, $f5, ALPHA_I, c41 daddiu CO2,CO2, 4 * SIZE MADD $f6, $f6, ALPHA_R, c42 daddiu CO3,CO3, 4 * SIZE MADD $f7, $f7, ALPHA_I, c42 daddiu CO4,CO4, 4 * SIZE ST $f0, -4 * SIZE(CO3) daddiu I, I, -1 ST $f1, -3 * SIZE(CO3) ST $f2, -2 * SIZE(CO3) ST $f3, -1 * SIZE(CO3) ST $f4, -4 * SIZE(CO4) MTC $0, c11 ST $f5, -3 * SIZE(CO4) MOV c21, c11 ST $f6, -2 * SIZE(CO4) MOV c31, c11 ST $f7, -1 * SIZE(CO4) bgtz I, .L31 MOV c41, c11 .align 3 .L40: andi I, M, 1 blez I, .L49 MOV c61, c11 LD a1, 0 * SIZE(AO) MOV c71, c11 LD a2, 1 * SIZE(AO) MOV c81, c11 LD b1, 0 * SIZE(B) LD b2, 1 * SIZE(B) LD b3, 2 * SIZE(B) LD b4, 3 * SIZE(B) LD b5, 4 * SIZE(B) LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) dsra L, K, 2 blez L, .L45 move BO, B .align 3 .L42: MADD c11, c11, a1, b1 LD b1, 16 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a1, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a1, b4 LD b4, 7 * SIZE(BO) LD a1, 4 * SIZE(AO) daddiu L, L, -1 MADD c11, c11, a2, b5 LD b5, 20 * SIZE(BO) MADD c21, c21, a2, b2 LD b2, 9 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 10 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 11 * SIZE(BO) LD a2, 2 * SIZE(AO) daddiu AO, AO, 4 * SIZE MADD c11, c11, a2, b6 LD b6, 24 * SIZE(BO) MADD c21, c21, a2, b2 LD b2, 13 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 14 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 15 * SIZE(BO) LD a2, -1 * SIZE(AO) daddiu BO, BO, 16 * SIZE MADD c11, c11, a2, b7 LD b7, 12 * SIZE(BO) MADD c21, c21, a2, b2 LD b2, 1 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 2 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 3 * SIZE(BO) bgtz L, .L42 LD a2, 1 * SIZE(AO) .align 3 .L45: andi L, K, 3 NOP blez L, .L48 NOP .align 3 .L46: MADD c11, c11, a1, b1 LD b1, 4 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a1, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a1, b4 LD a1, 1 * SIZE(AO) LD b4, 7 * SIZE(BO) daddiu L, L, -1 daddiu AO, AO, 1 * SIZE MOV a2, a2 bgtz L, .L46 daddiu BO, BO, 4 * SIZE .L48: LD $f0, 0 * SIZE(CO1) LD $f1, 1 * SIZE(CO1) LD $f2, 0 * SIZE(CO2) LD $f3, 1 * SIZE(CO2) LD $f4, 0 * SIZE(CO3) MADD $f0, $f0, ALPHA_R, c11 LD $f5, 1 * SIZE(CO3) MADD $f1, $f1, ALPHA_I, c11 LD $f6, 0 * SIZE(CO4) MADD $f2, $f2, ALPHA_R, c21 LD $f7, 1 * SIZE(CO4) MADD $f3, $f3, ALPHA_I, c21 MADD $f4, $f4, ALPHA_R, c31 ST $f0, 0 * SIZE(CO1) MADD $f5, $f5, ALPHA_I, c31 ST $f1, 1 * SIZE(CO1) MADD $f6, $f6, ALPHA_R, c41 ST $f2, 0 * SIZE(CO2) MADD $f7, $f7, ALPHA_I, c41 ST $f3, 1 * SIZE(CO2) ST $f4, 0 * SIZE(CO3) ST $f5, 1 * SIZE(CO3) ST $f6, 0 * SIZE(CO4) ST $f7, 1 * SIZE(CO4) .align 3 .L49: move B, BO .align 3 .L50: andi J, N, 2 blez J, .L70 move AO, A move CO1, C daddu CO2, C, LDC dsra I, M, 1 blez I, .L60 daddu C, CO2, LDC .L51: LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a5, 4 * SIZE(AO) LD b1, 0 * SIZE(B) MOV c12, c11 LD b2, 1 * SIZE(B) MOV c22, c11 LD b3, 2 * SIZE(B) LD b5, 4 * SIZE(B) dsra L, K, 2 LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) blez L, .L55 move BO, B .align 3 .L52: MADD c11, c11, a1, b1 LD a3, 2 * SIZE(AO) MADD c21, c21, a1, b2 LD b4, 3 * SIZE(BO) MADD c12, c12, a2, b1 LD a4, 3 * SIZE(AO) MADD c22, c22, a2, b2 LD b1, 8 * SIZE(BO) MADD c11, c11, a3, b3 LD a1, 8 * SIZE(AO) MADD c21, c21, a3, b4 LD b2, 5 * SIZE(BO) MADD c12, c12, a4, b3 LD a2, 5 * SIZE(AO) MADD c22, c22, a4, b4 LD b3, 6 * SIZE(BO) MADD c11, c11, a5, b5 LD a3, 6 * SIZE(AO) MADD c21, c21, a5, b2 LD b4, 7 * SIZE(BO) MADD c12, c12, a2, b5 LD a4, 7 * SIZE(AO) MADD c22, c22, a2, b2 LD b5, 12 * SIZE(BO) MADD c11, c11, a3, b3 LD a5, 12 * SIZE(AO) MADD c21, c21, a3, b4 LD b2, 9 * SIZE(BO) MADD c12, c12, a4, b3 LD a2, 9 * SIZE(AO) MADD c22, c22, a4, b4 LD b3, 10 * SIZE(BO) daddiu AO, AO, 8 * SIZE daddiu L, L, -1 bgtz L, .L52 daddiu BO, BO, 8 * SIZE .align 3 .L55: andi L, K, 3 NOP blez L, .L58 NOP .align 3 .L56: MADD c11, c11, a1, b1 LD a2, 1 * SIZE(AO) MADD c21, c21, a1, b2 LD a1, 2 * SIZE(AO) MADD c12, c12, a2, b1 LD b1, 2 * SIZE(BO) MADD c22, c22, a2, b2 LD b2, 3 * SIZE(BO) daddiu L, L, -1 daddiu AO, AO, 2 * SIZE bgtz L, .L56 daddiu BO, BO, 2 * SIZE .L58: LD $f0, 0 * SIZE(CO1) LD $f1, 1 * SIZE(CO1) LD $f2, 2 * SIZE(CO1) LD $f3, 3 * SIZE(CO1) LD $f4, 0 * SIZE(CO2) LD $f5, 1 * SIZE(CO2) LD $f6, 2 * SIZE(CO2) LD $f7, 3 * SIZE(CO2) MADD $f0, $f0, ALPHA_R, c11 daddiu I, I, -1 MADD $f1, $f1, ALPHA_I, c11 daddiu CO1,CO1, 4 * SIZE MADD $f2, $f2, ALPHA_R, c12 daddiu CO2,CO2, 4 * SIZE MADD $f3, $f3, ALPHA_I, c12 MADD $f4, $f4, ALPHA_R, c21 MADD $f5, $f5, ALPHA_I, c21 MADD $f6, $f6, ALPHA_R, c22 MADD $f7, $f7, ALPHA_I, c22 ST $f0, -4 * SIZE(CO1) ST $f1, -3 * SIZE(CO1) ST $f2, -2 * SIZE(CO1) ST $f3, -1 * SIZE(CO1) ST $f4, -4 * SIZE(CO2) ST $f5, -3 * SIZE(CO2) ST $f6, -2 * SIZE(CO2) bgtz I, .L51 ST $f7, -1 * SIZE(CO2) .align 3 .L60: andi I, M, 1 blez I, .L69 NOP dsra L, K, 2 LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a3, 2 * SIZE(AO) MOV c31, c11 LD a4, 3 * SIZE(AO) MOV c41, c11 LD b1, 0 * SIZE(B) LD b2, 1 * SIZE(B) LD b3, 2 * SIZE(B) LD b4, 3 * SIZE(B) LD b5, 4 * SIZE(B) LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) blez L, .L65 move BO, B .align 3 .L62: MADD c11, c11, a1, b1 LD b1, 4 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 5 * SIZE(BO) MADD c31, c31, a2, b3 LD b3, 6 * SIZE(BO) MADD c41, c41, a2, b4 LD b4, 7 * SIZE(BO) LD a1, 4 * SIZE(AO) LD a2, 5 * SIZE(AO) MADD c11, c11, a3, b1 LD b1, 8 * SIZE(BO) MADD c21, c21, a3, b2 LD b2, 9 * SIZE(BO) MADD c31, c31, a4, b3 LD b3, 10 * SIZE(BO) MADD c41, c41, a4, b4 LD b4, 11 * SIZE(BO) LD a3, 6 * SIZE(AO) LD a4, 7 * SIZE(AO) daddiu L, L, -1 daddiu AO, AO, 4 * SIZE bgtz L, .L62 daddiu BO, BO, 8 * SIZE .align 3 .L65: andi L, K, 3 NOP blez L, .L68 NOP .align 3 .L66: MADD c11, c11, a1, b1 LD b1, 2 * SIZE(BO) MADD c21, c21, a1, b2 LD b2, 3 * SIZE(BO) LD a1, 1 * SIZE(AO) daddiu L, L, -1 daddiu AO, AO, 1 * SIZE bgtz L, .L66 daddiu BO, BO, 2 * SIZE .L68: LD $f0, 0 * SIZE(CO1) LD $f1, 1 * SIZE(CO1) LD $f2, 0 * SIZE(CO2) LD $f3, 1 * SIZE(CO2) ADD c11, c11, c31 ADD c21, c21, c41 MADD $f0, $f0, ALPHA_R, c11 MADD $f1, $f1, ALPHA_I, c11 MADD $f2, $f2, ALPHA_R, c21 MADD $f3, $f3, ALPHA_I, c21 ST $f0, 0 * SIZE(CO1) ST $f1, 1 * SIZE(CO1) ST $f2, 0 * SIZE(CO2) ST $f3, 1 * SIZE(CO2) .align 3 .L69: move B, BO .align 3 .L70: andi J, N, 1 blez J, .L999 move AO, A move CO1, C dsra I, M, 1 blez I, .L80 daddu C, CO1, LDC .L71: LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a5, 4 * SIZE(AO) LD b1, 0 * SIZE(B) MOV c12, c11 LD b2, 1 * SIZE(B) MOV c22, c11 LD b3, 2 * SIZE(B) LD b5, 4 * SIZE(B) dsra L, K, 2 LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) blez L, .L75 move BO, B .align 3 .L72: LD a1, 0 * SIZE(AO) LD a2, 1 * SIZE(AO) LD b1, 0 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 LD a1, 2 * SIZE(AO) LD a2, 3 * SIZE(AO) LD b1, 1 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 LD a1, 4 * SIZE(AO) LD a2, 5 * SIZE(AO) LD b1, 2 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 LD a1, 6 * SIZE(AO) LD a2, 7 * SIZE(AO) LD b1, 3 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 daddiu L, L, -1 daddiu AO, AO, 8 * SIZE bgtz L, .L72 daddiu BO, BO, 4 * SIZE .align 3 .L75: andi L, K, 3 NOP blez L, .L78 NOP .align 3 .L76: LD a1, 0 * SIZE(AO) LD a2, 1 * SIZE(AO) LD b1, 0 * SIZE(BO) MADD c11, c11, a1, b1 MADD c12, c12, a2, b1 daddiu L, L, -1 daddiu AO, AO, 2 * SIZE bgtz L, .L76 daddiu BO, BO, 1 * SIZE .L78: LD $f0, 0 * SIZE(CO1) LD $f1, 1 * SIZE(CO1) LD $f2, 2 * SIZE(CO1) LD $f3, 3 * SIZE(CO1) ADD c11, c11, c21 daddiu I, I, -1 ADD c12, c12, c22 daddiu CO1,CO1, 4 * SIZE MADD $f0, $f0, ALPHA_R, c11 MADD $f1, $f1, ALPHA_I, c11 MADD $f2, $f2, ALPHA_R, c12 MADD $f3, $f3, ALPHA_I, c12 ST $f0, -4 * SIZE(CO1) ST $f1, -3 * SIZE(CO1) ST $f2, -2 * SIZE(CO1) bgtz I, .L71 ST $f3, -1 * SIZE(CO1) .align 3 .L80: andi I, M, 1 blez I, .L89 NOP LD a1, 0 * SIZE(AO) MTC $0, c11 LD a2, 1 * SIZE(AO) MOV c21, c11 LD a3, 2 * SIZE(AO) LD a4, 3 * SIZE(AO) LD b1, 0 * SIZE(B) LD b2, 1 * SIZE(B) LD b3, 2 * SIZE(B) LD b4, 3 * SIZE(B) LD b5, 4 * SIZE(B) LD b6, 8 * SIZE(B) LD b7, 12 * SIZE(B) dsra L, K, 2 blez L, .L85 move BO, B .align 3 .L82: LD a1, 0 * SIZE(AO) LD b1, 0 * SIZE(BO) MADD c11, c11, a1, b1 LD a1, 1 * SIZE(AO) LD b1, 1 * SIZE(BO) MADD c21, c21, a1, b1 LD a1, 2 * SIZE(AO) LD b1, 2 * SIZE(BO) MADD c11, c11, a1, b1 LD a1, 3 * SIZE(AO) LD b1, 3 * SIZE(BO) MADD c21, c21, a1, b1 daddiu L, L, -1 daddiu AO, AO, 4 * SIZE bgtz L, .L82 daddiu BO, BO, 4 * SIZE .align 3 .L85: andi L, K, 3 NOP blez L, .L88 NOP .align 3 .L86: LD a1, 0 * SIZE(AO) LD b1, 0 * SIZE(BO) MADD c11, c11, a1, b1 daddiu L, L, -1 daddiu AO, AO, 1 * SIZE bgtz L, .L86 daddiu BO, BO, 1 * SIZE .L88: LD $f0, 0 * SIZE(CO1) LD $f1, 1 * SIZE(CO1) ADD c11, c11, c21 MADD $f0, $f0, ALPHA_R, c11 MADD $f1, $f1, ALPHA_I, c11 ST $f0, 0 * SIZE(CO1) ST $f1, 1 * SIZE(CO1) .align 3 .L89: move B, BO .align 3 .L999: LDARG $16, 0($sp) LDARG $17, 8($sp) LDARG $18, 16($sp) LDARG $19, 24($sp) LDARG $20, 32($sp) LDARG $21, 40($sp) ldc1 $f24, 48($sp) ldc1 $f25, 56($sp) ldc1 $f26, 64($sp) ldc1 $f27, 72($sp) ldc1 $f28, 80($sp) ldc1 $f29, 88($sp) j $31 daddiu $sp, $sp, 128 EPILOGUE