// vim: set syntax=asm : {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_min", op:"vminps", from:from, to:to, type:type%} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_max", op:"vmaxps", from:from, to:to, type:type%} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_add", op:"vaddps", from:from, to:to, type:type%} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_mul", op:"vmulps", from:from, to:to, type:type%} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_sub", op:"vsubps", from:from, to:to, type:type%} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_sub_flipped", op:"vsubps", from:from, to:to, flipped: true, type:type%} {{L}}leaky_relu: // can only use ymm12 to ymm15 // ymm15 <- alpha {% if type == "f32" %} vbroadcastss ymm15, dword ptr [rdi + 8] {% else %} pinsrw xmm15, word ptr [rdi + 8], 0 vcvtph2ps ymm15, xmm15 vbroadcastss ymm15, xmm15 {% endif %} // ymm14 <- all zero vpxor ymm14, ymm14, ymm14 {% for reg in (from..to) %} // ymm12 <- alpha * x vmulps ymm12, ymm{{reg}}, ymm15 vcmpps ymm13, ymm14, ymm{{reg}}, 1 // 1 means LT vblendvps ymm{{reg}}, ymm12, ymm{{reg}}, ymm13 {% endfor %} // select muled of orginal jmp {{L}}non_linear_loop {{L}}q_scale: {{L}}q_shl: {{L}}q_shr: jmp {{L}}unsupported