aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/constants.c
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2015-02-04 20:23:19 -0300
committerJames Almer <jamrial@gmail.com>2015-02-05 15:02:33 -0300
commit15574c505b81d6e92370096eaca6764c13b5fbfc (patch)
tree224fce351260fb2a9fd6e1a656d6245998b26caf /libavcodec/x86/constants.c
parent042c1159fcf8431725c718a504b1fb40364bf434 (diff)
downloadffmpeg-15574c505b81d6e92370096eaca6764c13b5fbfc.tar.gz
x86/hevcdsp: add ff_hevc_sao_edge_filter_{10,12}_{sse2,avx2}
Original x86 intrinsics code by Pierre-Edouard Lepere. Yasm port, refactoring and optimizations by James Almer. Benchmarks of BQTerrace_1920x1080_60_qp22.bin with an Intel Core i5-4200U Width 32 342694 decicycles in sao_edge_filter_10, 16384 runs, 0 skips 29476 decicycles in ff_hevc_sao_edge_filter_32_10_ssse3, 16384 runs, 0 skips 13996 decicycles in ff_hevc_sao_edge_filter_32_10_avx2, 16381 runs, 3 skips Width 64 581163 decicycles in sao_edge_filter_10, 8192 runs, 0 skips 59774 decicycles in ff_hevc_sao_edge_filter_64_10_ssse3, 8192 runs, 0 skips 28383 decicycles in ff_hevc_sao_edge_filter_64_10_avx2, 8191 runs, 1 skips Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/constants.c')
-rw-r--r--libavcodec/x86/constants.c9
1 files changed, 6 insertions, 3 deletions
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index 54acc891c5..a7cb75d456 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -22,8 +22,10 @@
#include "libavutil/x86/asm.h" // for xmm_reg
#include "constants.h"
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x0001000100010001ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x0001000100010001ULL,
+ 0x0001000100010001ULL, 0x0001000100010001ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL,
+ 0x0002000200020002ULL, 0x0002000200020002ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL };
@@ -48,7 +50,8 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03F
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
+ 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL,