// vim: ft=arm .{{label}}: ldr x2, [x0, #8] {% capture mr_over_4 %}{{ mr | divided_by: 4}}{%endcapture%} {% capture cols%}{{to | plus: 1| minus:from| divided_by:mr_over_4}}{%endcapture%} {% capture loads %}{{cols | divided_by:4}}{% endcapture %} {%if cols == "1" %} ld1 {v0.s}[0], [ x2 ] {% else %} {% for reg in (1..loads) %} ldr q{{reg |minus:1}}, [ x2 ], #16 {% endfor %} {% endif %} // {{mr}} {{cols}} {% for col in (1..cols) %} dup v3.4s, v{{col| minus: 1|divided_by:4}}.s[{{col| minus: 1|modulo:4}}] {% for row in (1..mr_over_4) %} {% capture acc %}{{ col|minus:1|times:mr_over_4|plus:row|minus:1|plus:from }}{% endcapture %} {% if flipped %} {{op}} v{{acc}}.4s, v{{acc}}.4s, v3.4s {% else %} {{op}} v{{acc}}.4s, v3.4s, v{{acc}}.4s {% endif %} {% endfor %} {% endfor %} b .non_linear_loop