// Tile size: 8x1 // Accumulators: 0-7 // Col regs: 8-14 // Row regs: 15 vbroadcastss ymm15, dword ptr [rcx] vmovaps ymm8, [rax + 0] vmovaps ymm9, [rax + 32] vmovaps ymm10, [rax + 64] vmovaps ymm11, [rax + 96] vfmadd231ps ymm0, ymm8, ymm15 vfmadd231ps ymm1, ymm9, ymm15 vmovaps ymm12, [rax + 128] vmovaps ymm13, [rax + 160] vfmadd231ps ymm2, ymm10, ymm15 vfmadd231ps ymm3, ymm11, ymm15 vmovaps ymm14, [rax + 192] vmovaps ymm11, [rax + 224] vfmadd231ps ymm4, ymm12, ymm15 vfmadd231ps ymm5, ymm13, ymm15 vfmadd231ps ymm6, ymm14, ymm15 vfmadd231ps ymm7, ymm11, ymm15 add rcx, 4 add rax, 256