srawi. J, N, 2 ble DSTRM_LT_L4_END DSTRM_LT_L4_BEGIN: mr CO, C mr AO, A slwi T1, LDC , 2 add C, C, T1 mr KK, OFFSET srawi. I, M, 4 ble DSTRM_LT_L4x16_END DSTRM_LT_L4x16_BEGIN: mr BO, B li L, -128 mr T1, CO add T2, T1, LDC add T3, T2, LDC add T4, T3, LDC and T1, T1, L and T2, T2, L and T3, T3, L and T4, T4, L dcbt T1, r0 dcbt T2, r0 dcbt T3, r0 dcbt T4, r0 addi T1, T1, 128 addi T2, T2, 128 addi T3, T3, 128 addi T4, T4, 128 dcbt T1, r0 dcbt T2, r0 dcbt T3, r0 dcbt T4, r0 DSTRM_LT_L4x16_LOOP_START: INIT_16x4 addic. L, KK, 0 ble- DSTRM_LT_L4x16_SAVE mtctr L DSTRM_LT_L4x16_LOOP: dcbt AO, PRE dcbt BO, PRE KERNEL_16x4 bdz- DSTRM_LT_L4x16_SAVE dcbt AO, PRE KERNEL_16x4 bdz- DSTRM_LT_L4x16_SAVE dcbt AO, PRE KERNEL_16x4 bdz- DSTRM_LT_L4x16_SAVE dcbt AO, PRE KERNEL_16x4 bdnz+ DSTRM_LT_L4x16_LOOP DSTRM_LT_L4x16_SAVE: SOLVE_LT_16x4 addi CO, CO, 16*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 4+BASE_SHIFT slwi T4, T4, 2+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 16 addic. I, I, -1 bgt DSTRM_LT_L4x16_BEGIN DSTRM_LT_L4x16_END: DSTRM_LT_L4x8_BEGIN: andi. T2, M, 15 ble DSTRM_LT_L4x1_END andi. T1, M, 8 ble DSTRM_LT_L4x8_END mr BO, B DSTRM_LT_L4x8_LOOP_START: INIT_8x4 addic. L, KK, 0 ble DSTRM_LT_L4x8_SAVE DSTRM_LT_L4x8_LOOP: KERNEL_8x4 addic. L, L, -1 bgt DSTRM_LT_L4x8_LOOP DSTRM_LT_L4x8_SAVE: SOLVE_LT_8x4 addi CO, CO, 8*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 3+BASE_SHIFT slwi T4, T4, 2+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 8 DSTRM_LT_L4x8_END: DSTRM_LT_L4x4_BEGIN: andi. T1, M, 4 ble DSTRM_LT_L4x4_END mr BO, B DSTRM_LT_L4x4_LOOP_START: INIT_4x4 addic. L, KK, 0 ble DSTRM_LT_L4x4_SAVE DSTRM_LT_L4x4_LOOP: KERNEL_4x4 addic. L, L, -1 bgt DSTRM_LT_L4x4_LOOP DSTRM_LT_L4x4_SAVE: SOLVE_LT_4x4 addi CO, CO, 4*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 2+BASE_SHIFT slwi T4, T4, 2+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 4 DSTRM_LT_L4x4_END: DSTRM_LT_L4x2_BEGIN: andi. T1, M, 2 ble DSTRM_LT_L4x2_END mr BO, B DSTRM_LT_L4x2_LOOP_START: INIT_2x4 addic. L, KK, 0 ble DSTRM_LT_L4x2_SAVE DSTRM_LT_L4x2_LOOP: KERNEL_2x4 addic. L, L, -1 bgt DSTRM_LT_L4x2_LOOP DSTRM_LT_L4x2_SAVE: SOLVE_LT_2x4 addi CO, CO, 2*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 1+BASE_SHIFT slwi T4, T4, 2+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 2 DSTRM_LT_L4x2_END: DSTRM_LT_L4x1_BEGIN: andi. T1, M, 1 ble DSTRM_LT_L4x1_END mr BO, B DSTRM_LT_L4x1_LOOP_START: INIT_1x4 addic. L, KK, 0 ble DSTRM_LT_L4x1_SAVE DSTRM_LT_L4x1_LOOP: KERNEL_1x4 addic. L, L, -1 bgt DSTRM_LT_L4x1_LOOP DSTRM_LT_L4x1_SAVE: SOLVE_LT_1x4 addi CO, CO, 1*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 0+BASE_SHIFT slwi T4, T4, 2+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 1 DSTRM_LT_L4x1_END: slwi T1, K, 2+BASE_SHIFT add B, B, T1 addic. J, J, -1 bgt DSTRM_LT_L4_BEGIN andi. T2, N, 3 ble L999 DSTRM_LT_L4_END: b DSTRM_LT_L2_BEGIN L999_H1: b L999 DSTRM_LT_L2_BEGIN: andi. T1, N, 2 ble DSTRM_LT_L2_END mr CO, C mr AO, A slwi T1, LDC , 1 add C, C, T1 mr KK, OFFSET srawi. I, M, 4 ble DSTRM_LT_L2x16_END DSTRM_LT_L2x16_BEGIN: mr BO, B DSTRM_LT_L2x16_LOOP_START: INIT_16x2 addic. L, KK, 0 ble DSTRM_LT_L2x16_SAVE DSTRM_LT_L2x16_LOOP: KERNEL_16x2 addic. L, L, -1 bgt DSTRM_LT_L2x16_LOOP DSTRM_LT_L2x16_SAVE: SOLVE_LT_16x2 addi CO, CO, 16*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 4+BASE_SHIFT slwi T4, T4, 1+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 16 addic. I, I, -1 bgt DSTRM_LT_L2x16_BEGIN DSTRM_LT_L2x16_END: DSTRM_LT_L2x8_BEGIN: andi. T2, M, 15 ble DSTRM_LT_L2x1_END andi. T1, M, 8 ble DSTRM_LT_L2x8_END mr BO, B DSTRM_LT_L2x8_LOOP_START: INIT_8x2 addic. L, KK, 0 ble DSTRM_LT_L2x8_SAVE DSTRM_LT_L2x8_LOOP: KERNEL_8x2 addic. L, L, -1 bgt DSTRM_LT_L2x8_LOOP DSTRM_LT_L2x8_SAVE: SOLVE_LT_8x2 addi CO, CO, 8*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 3+BASE_SHIFT slwi T4, T4, 1+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 8 DSTRM_LT_L2x8_END: DSTRM_LT_L2x4_BEGIN: andi. T1, M, 4 ble DSTRM_LT_L2x4_END mr BO, B DSTRM_LT_L2x4_LOOP_START: INIT_4x2 addic. L, KK, 0 ble DSTRM_LT_L2x4_SAVE DSTRM_LT_L2x4_LOOP: KERNEL_4x2 addic. L, L, -1 bgt DSTRM_LT_L2x4_LOOP DSTRM_LT_L2x4_SAVE: SOLVE_LT_4x2 addi CO, CO, 4*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 2+BASE_SHIFT slwi T4, T4, 1+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 4 DSTRM_LT_L2x4_END: DSTRM_LT_L2x2_BEGIN: andi. T1, M, 2 ble DSTRM_LT_L2x2_END mr BO, B DSTRM_LT_L2x2_LOOP_START: INIT_2x2 addic. L, KK, 0 ble DSTRM_LT_L2x2_SAVE DSTRM_LT_L2x2_LOOP: KERNEL_2x2 addic. L, L, -1 bgt DSTRM_LT_L2x2_LOOP DSTRM_LT_L2x2_SAVE: SOLVE_LT_2x2 addi CO, CO, 2*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 1+BASE_SHIFT slwi T4, T4, 1+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 2 DSTRM_LT_L2x2_END: DSTRM_LT_L2x1_BEGIN: andi. T1, M, 1 ble DSTRM_LT_L2x1_END mr BO, B DSTRM_LT_L2x1_LOOP_START: INIT_1x2 addic. L, KK, 0 ble DSTRM_LT_L2x1_SAVE DSTRM_LT_L2x1_LOOP: KERNEL_1x2 addic. L, L, -1 bgt DSTRM_LT_L2x1_LOOP DSTRM_LT_L2x1_SAVE: SOLVE_LT_1x2 addi CO, CO, 1*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 0+BASE_SHIFT slwi T4, T4, 1+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 1 DSTRM_LT_L2x1_END: slwi T1, K, 1+BASE_SHIFT add B, B, T1 DSTRM_LT_L2_END: DSTRM_LT_L1_BEGIN: andi. T1, N, 1 ble DSTRM_LT_L1_END mr CO, C mr AO, A mr KK, OFFSET srawi. I, M, 4 ble DSTRM_LT_L1x16_END DSTRM_LT_L1x16_BEGIN: mr BO, B DSTRM_LT_L1x16_LOOP_START: INIT_16x1 addic. L, KK, 0 ble DSTRM_LT_L1x16_SAVE DSTRM_LT_L1x16_LOOP: KERNEL_16x1 addic. L, L, -1 bgt DSTRM_LT_L1x16_LOOP DSTRM_LT_L1x16_SAVE: SOLVE_LT_16x1 addi CO, CO, 16*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 4+BASE_SHIFT slwi T4, T4, 0+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 16 addic. I, I, -1 bgt DSTRM_LT_L1x16_BEGIN DSTRM_LT_L1x16_END: DSTRM_LT_L1x8_BEGIN: andi. T1, M, 8 ble DSTRM_LT_L1x8_END mr BO, B DSTRM_LT_L1x8_LOOP_START: INIT_8x1 addic. L, KK, 0 ble DSTRM_LT_L1x8_SAVE DSTRM_LT_L1x8_LOOP: KERNEL_8x1 addic. L, L, -1 bgt DSTRM_LT_L1x8_LOOP DSTRM_LT_L1x8_SAVE: SOLVE_LT_8x1 addi CO, CO, 8*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 3+BASE_SHIFT slwi T4, T4, 0+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 8 DSTRM_LT_L1x8_END: DSTRM_LT_L1x4_BEGIN: andi. T1, M, 4 ble DSTRM_LT_L1x4_END mr BO, B DSTRM_LT_L1x4_LOOP_START: INIT_4x1 addic. L, KK, 0 ble DSTRM_LT_L1x4_SAVE DSTRM_LT_L1x4_LOOP: KERNEL_4x1 addic. L, L, -1 bgt DSTRM_LT_L1x4_LOOP DSTRM_LT_L1x4_SAVE: SOLVE_LT_4x1 addi CO, CO, 4*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 2+BASE_SHIFT slwi T4, T4, 0+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 4 DSTRM_LT_L1x4_END: DSTRM_LT_L1x2_BEGIN: andi. T1, M, 2 ble DSTRM_LT_L1x2_END mr BO, B DSTRM_LT_L1x2_LOOP_START: INIT_2x1 addic. L, KK, 0 ble DSTRM_LT_L1x2_SAVE DSTRM_LT_L1x2_LOOP: KERNEL_2x1 addic. L, L, -1 bgt DSTRM_LT_L1x2_LOOP DSTRM_LT_L1x2_SAVE: SOLVE_LT_2x1 addi CO, CO, 2*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 1+BASE_SHIFT slwi T4, T4, 0+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 2 DSTRM_LT_L1x2_END: DSTRM_LT_L1x1_BEGIN: andi. T1, M, 1 ble DSTRM_LT_L1x1_END mr BO, B DSTRM_LT_L1x1_LOOP_START: INIT_1x1 addic. L, KK, 0 ble DSTRM_LT_L1x1_SAVE DSTRM_LT_L1x1_LOOP: KERNEL_1x1 addic. L, L, -1 bgt DSTRM_LT_L1x1_LOOP DSTRM_LT_L1x1_SAVE: SOLVE_LT_1x1 addi CO, CO, 1*SIZE sub T3, K, KK sub T4, K, KK slwi T3, T3, 0+BASE_SHIFT slwi T4, T4, 0+BASE_SHIFT add AO, AO, T3 add BO, BO, T4 addi KK, KK, 1 DSTRM_LT_L1x1_END: DSTRM_LT_L1_END: