diff options
author | James Almer <jamrial@gmail.com> | 2014-08-19 23:52:05 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2014-08-20 16:54:52 -0300 |
commit | 76a99d467fff3faf826c46ccde66bd1303876a0d (patch) | |
tree | 0972e77ace6de2660a7321a9b795fd02d3b187f5 /libavcodec/x86/hevc_res_add.asm | |
parent | d2163f5e2836090dc7827f43bef921dbe364ec8f (diff) | |
download | ffmpeg-76a99d467fff3faf826c46ccde66bd1303876a0d.tar.gz |
x86/hecv_res_add: add ff_hevc_transform_add{8,16,32}_8_avx
~15% faster than sse2
Reviewed-by: Mickaƫl Raulet <mraulet@gmail.com>
Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/hevc_res_add.asm')
-rw-r--r-- | libavcodec/x86/hevc_res_add.asm | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm index 47022d3610..feea50c67b 100644 --- a/libavcodec/x86/hevc_res_add.asm +++ b/libavcodec/x86/hevc_res_add.asm @@ -156,8 +156,8 @@ cglobal hevc_transform_add4_8, 3, 4, 6 %endmacro -INIT_XMM sse2 -; void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +%macro TRANSFORM_ADD_8 0 +; void ff_hevc_transform_add8_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) cglobal hevc_transform_add8_8, 3, 4, 8 lea r3, [r2*3] TR_ADD_SSE_8_8 @@ -167,7 +167,7 @@ cglobal hevc_transform_add8_8, 3, 4, 8 RET %if ARCH_X86_64 -; void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +; void ff_hevc_transform_add16_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) cglobal hevc_transform_add16_8, 3, 4, 12 lea r3, [r2*3] TR_ADD_SSE_16_8 @@ -178,7 +178,7 @@ cglobal hevc_transform_add16_8, 3, 4, 12 %endrep RET -; void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +; void ff_hevc_transform_add32_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) cglobal hevc_transform_add32_8, 3, 4, 12 TR_ADD_SSE_32_8 @@ -190,6 +190,13 @@ cglobal hevc_transform_add32_8, 3, 4, 12 RET %endif ;ARCH_X86_64 +%endmacro + +INIT_XMM sse2 +TRANSFORM_ADD_8 +INIT_XMM avx +TRANSFORM_ADD_8 + ;----------------------------------------------------------------------------- ; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride) ;----------------------------------------------------------------------------- |