// vim: ft=arm {% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_min", op:"fmin", from:from, to:to %} {% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_max", op:"fmax", from:from, to:to %} {% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_mul", op:"fmul", from:from, to:to %} {% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_add", op:"fadd", from:from, to:to %} {% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_sub", op:"fsub", from:from, to:to %} {% include "arm64simd_mmm_4s_scalar.tmpliq" label:"scalar_sub_flipped", op:"fsub", from:from, to:to, flipped:true %} .clear: {% for r in (from..to) %} eor v{{r}}.8b, v{{r}}.8b, v{{r}}.8b {% endfor %} b .non_linear_loop .leaky_relu: add x2, x0, #8 ld1 {v4.s}[0], [ x2 ] dup v4.4s, v4.s[0] // bsl cond/dst, then, else // fcmge dst, src, #0.0 {% for r in (from..to) %} fmul v0.4s, v{{r}}.4s, v4.4s fcmge v1.4s, v{{r}}.4s, #0.0 bsl v1.16b, v{{r}}.16b, v0.16b and v{{r}}.16b, v1.16b, v1.16b {% endfor %} b .non_linear_loop .q_scale: .q_shl: .q_shr: b .unsupported