diff options
author | Martin Vignali <martin.vignali@gmail.com> | 2017-10-02 01:29:32 +0200 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-10-03 19:47:37 -0300 |
commit | cbbec68847ed3485900e83ec231871f71bb97d0d (patch) | |
tree | e720235468b4ae41c4e7dd1c2011d28a40a99b99 /libavcodec/x86 | |
parent | 4590d073ccdc7c3ce0384d5b1fba56b3f6673535 (diff) | |
download | ffmpeg-cbbec68847ed3485900e83ec231871f71bb97d0d.tar.gz |
libavcodec/blockdsp : add AVX version
Also modify the required alignment, to 32 instead of 16
for several codecs
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/blockdsp.asm | 14 | ||||
-rw-r--r-- | libavcodec/x86/blockdsp_init.c | 6 |
2 files changed, 14 insertions, 6 deletions
diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm index 7cbfa3a843..2498bd40b3 100644 --- a/libavcodec/x86/blockdsp.asm +++ b/libavcodec/x86/blockdsp.asm @@ -4,6 +4,8 @@ ;* Copyright (c) 2008 Loren Merritt ;* Copyright (c) 2009 Fiona Glaser ;* +;* AVX version by Jokyo Images +;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or @@ -39,20 +41,18 @@ cglobal clear_block, 1, 1, %1, blocks mova [blocksq+mmsize*(1+%%i)], m0 mova [blocksq+mmsize*(2+%%i)], m0 mova [blocksq+mmsize*(3+%%i)], m0 - mova [blocksq+mmsize*(4+%%i)], m0 - mova [blocksq+mmsize*(5+%%i)], m0 - mova [blocksq+mmsize*(6+%%i)], m0 - mova [blocksq+mmsize*(7+%%i)], m0 -%assign %%i %%i+8 +%assign %%i %%i+4 %endrep RET %endmacro INIT_MMX mmx %define ZERO pxor -CLEAR_BLOCK 0, 2 +CLEAR_BLOCK 0, 4 INIT_XMM sse %define ZERO xorps +CLEAR_BLOCK 1, 2 +INIT_YMM avx CLEAR_BLOCK 1, 1 ;----------------------------------------- @@ -84,3 +84,5 @@ CLEAR_BLOCKS 0 INIT_XMM sse %define ZERO xorps CLEAR_BLOCKS 1 +INIT_YMM avx +CLEAR_BLOCKS 1 diff --git a/libavcodec/x86/blockdsp_init.c b/libavcodec/x86/blockdsp_init.c index afd25e1cbb..8b01a447cd 100644 --- a/libavcodec/x86/blockdsp_init.c +++ b/libavcodec/x86/blockdsp_init.c @@ -28,8 +28,10 @@ void ff_clear_block_mmx(int16_t *block); void ff_clear_block_sse(int16_t *block); +void ff_clear_block_avx(int16_t *block); void ff_clear_blocks_mmx(int16_t *blocks); void ff_clear_blocks_sse(int16_t *blocks); +void ff_clear_blocks_avx(int16_t *blocks); av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, AVCodecContext *avctx) @@ -50,5 +52,9 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, c->clear_block = ff_clear_block_sse; c->clear_blocks = ff_clear_blocks_sse; } + if (EXTERNAL_AVX_FAST(cpu_flags)) { + c->clear_block = ff_clear_block_avx; + c->clear_blocks = ff_clear_blocks_avx; + } #endif /* HAVE_X86ASM */ } |