// vim: ft=arm .{{label}}: {% capture mr_over_4 %}{{ mr | divided_by: 4}}{%endcapture%} {% capture mr_over_4_min_1 %}{{ mr | divided_by: 4 | minus: 1}}{%endcapture%} {%capture cols%}{{to | plus: 1| minus:from| divided_by:mr_over_4}}{%endcapture%} {%capture cols_min_1%}{{to | plus: 1| minus:from| divided_by:mr_over_4|minus:1}}{%endcapture%} {% if cols == "1" %} vld1.f32 d0[0], [ r3 ] {% else %} {%capture cols_over_2_minus_1%}{{cols | divided_by:2 | minus:1}}{%endcapture%} {% for c in (0..cols_over_2_minus_1) %} vldmia r3!, { d{{c}} } {% endfor %} {% endif %} {% for right in (0..cols_min_1) %} vdup.f32 q3, d{{right|divided_by:2}}[{{right| modulo:2}}] {% for down in (0..mr_over_4_min_1) %} {%capture acc%}{{mr_over_4|times:right|plus:from|plus:down}}{%endcapture%} {% if flipped %} {{op}} q{{acc}}, q{{acc}}, q3 {% else %} {{op}} q{{acc}}, q3, q{{acc}} {% endif %} {% endfor %} {% endfor %} b .non_linear_loop