// vim: set syntax=asm : {% include "zmm_scalar.tmpliq" label:"scalar_min", op:"vminps", from:from, to:to %} {% include "zmm_scalar.tmpliq" label:"scalar_max", op:"vmaxps", from:from, to:to %} {% include "zmm_scalar.tmpliq" label:"scalar_add", op:"vaddps", from:from, to:to %} {% include "zmm_scalar.tmpliq" label:"scalar_mul", op:"vmulps", from:from, to:to %} {% include "zmm_scalar.tmpliq" label:"scalar_sub", op:"vsubps", from:from, to:to %} {% include "zmm_scalar.tmpliq" label:"scalar_sub_flipped", op:"vsubps", from:from, to:to, flipped: true %} {{L}}leaky_relu: // can only use zmm12 to zmm15 // ymm15 <- alpha vbroadcastss zmm15, dword ptr [rdi + 8] // ymm14 <- all zero vpxorq zmm14, zmm14, zmm14 {% for reg in (from..to) %} vcmpps k1, zmm{{reg}}, zmm14, 1 // 1 means LT // ymm12 <- alpha * x if < 0 vmulps zmm{{reg}} {k1}, zmm{{reg}}, zmm15 {% endfor %} // select muled of orginal jmp {{L}}non_linear_loop {{L}}q_scale: {{L}}q_shl: {{L}}q_shr: jmp {{L}}unsupported