// vim: ft=arm .{{label}}: ldr x2, [x0, #8] {% capture mr_over_8 %}{{ mr | divided_by: 8}}{%endcapture%} {% capture cols%}{{to | plus: 1| minus:from| divided_by:mr_over_8}}{%endcapture%} {% capture loads %}{{cols | divided_by: 8}}{% endcapture %} {%if cols == "1" %} ld1 {v0.h}[0], [ x2 ] {% elsif cols == "3" %} ld1 {v0.s}[0], [ x2 ], #4 ld1 {v0.h}[2], [ x2 ] {% elsif cols == "4" %} ldr d0, [ x2 ] {% elsif cols == "6" %} ld1 {v0.d}[0], [ x2 ], #8 ld1 {v0.s}[2], [ x2 ] {% else %} {% for reg in (1..loads) %} ldr q{{reg |minus:1}}, [ x2 ], #16 {% endfor %} {% endif %} // mr:{{mr}} {{ loads }} {{cols}} {% for col in (1..cols) %} dup v3.8h, v{{col| minus: 1|divided_by:8}}.h[{{col| minus: 1|modulo:8}}] {% for row in (1..mr_over_8) %} {% capture acc %}{{ col|minus:1|times:mr_over_8|plus:row|minus:1|plus:from }}{% endcapture %} {% if flipped %} {{op}} v{{acc}}.8h, v{{acc}}.8h, v3.8h {% else %} {{op}} v{{acc}}.8h, v3.8h, v{{acc}}.8h {% endif %} {% endfor %} {% endfor %} b .non_linear_loop