// vim: set syntax=asm : {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_min", op:"vpminsd", from:from, to:to, type:"i32" %} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_max", op:"vpmaxsd", from:from, to:to, type:"i32" %} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_mul", op:"vpmulld", from:from, to:to, type:"i32" %} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_add", op:"vpaddd", from:from, to:to, type:"i32" %} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_sub", op:"vpsubd", from:from, to:to, type:"i32" %} {% include "fma_mmm_ymm_scalar.tmpliq" label:"scalar_sub_flipped", op:"vpsubd", from:from, to:to, flipped: true, type:"i32" %} {{L}}leaky_relu: // can only use ymm12 to ymm15 // ymm15 <- alpha vbroadcastss ymm15, dword ptr [rdi + 8] // ymm14 <- all zero vpxor ymm14, ymm14, ymm14 {% for reg in (from..to) %} vpmulld ymm12, ymm{{reg}}, ymm15 vpcmpgtd ymm13, ymm14, ymm{{reg}} vblendvps ymm{{reg}}, ymm{{reg}}, ymm12, ymm13 {% endfor %} jmp {{L}}non_linear_loop