diff options
author | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-07-21 22:11:03 +0000 |
---|---|---|
committer | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-07-21 22:11:03 +0000 |
commit | 7dd224a42daf32b2ed69a1cd9aa61d35c8a0d16e (patch) | |
tree | 7df6b7966c6408c9750217c0826a072000a6bd2b /libavcodec/x86/vp8dsp-init.c | |
parent | 8a810ccbba3b4d58ed27251f68ac88a0cb40b227 (diff) | |
download | ffmpeg-7dd224a42daf32b2ed69a1cd9aa61d35c8a0d16e.tar.gz |
Various VP8 x86 deblocking speedups
SSSE3 versions, improve SSE2 versions a bit.
SSE2/SSSE3 mbedge h functions are currently broken, so explicitly disable them.
Originally committed as revision 24403 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/vp8dsp-init.c')
-rw-r--r-- | libavcodec/x86/vp8dsp-init.c | 100 |
1 files changed, 40 insertions, 60 deletions
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c index a7b1ce0de7..8c0f415c6b 100644 --- a/libavcodec/x86/vp8dsp-init.c +++ b/libavcodec/x86/vp8dsp-init.c @@ -223,64 +223,31 @@ extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride) extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]); extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride); -extern void ff_vp8_v_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim); -extern void ff_vp8_v_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); -extern void ff_vp8_v_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); -extern void ff_vp8_h_loop_filter_simple_mmx (uint8_t *dst, int stride, int flim); -extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim); -extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim); - -extern void ff_vp8_v_loop_filter16y_inner_mmx (uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_v_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_v_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_h_loop_filter16y_inner_mmx (uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride, - int e, int i, int hvt); - -extern void ff_vp8_v_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_v_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_v_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_h_loop_filter8uv_inner_mmx (uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_h_loop_filter8uv_inner_mmxext(uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_h_loop_filter8uv_inner_sse2 (uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); - -extern void ff_vp8_v_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_v_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_v_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_h_loop_filter16y_mbedge_mmx (uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_h_loop_filter16y_mbedge_mmxext(uint8_t *dst, int stride, - int e, int i, int hvt); -extern void ff_vp8_h_loop_filter16y_mbedge_sse2 (uint8_t *dst, int stride, - int e, int i, int hvt); - -extern void ff_vp8_v_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_v_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_v_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_h_loop_filter8uv_mbedge_mmx (uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_h_loop_filter8uv_mbedge_mmxext(uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); -extern void ff_vp8_h_loop_filter8uv_mbedge_sse2 (uint8_t *dstU, uint8_t *dstV, - int s, int e, int i, int hvt); +#define DECLARE_LOOP_FILTER(NAME)\ +extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\ +extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\ +extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\ + int e, int i, int hvt);\ +extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\ + int e, int i, int hvt);\ +extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\ + int s, int e, int i, int hvt);\ +extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\ + int s, int e, int i, int hvt);\ +extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\ + int e, int i, int hvt);\ +extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\ + int e, int i, int hvt);\ +extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\ + int s, int e, int i, int hvt);\ +extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\ + int s, int e, int i, int hvt); + +DECLARE_LOOP_FILTER(mmx) +DECLARE_LOOP_FILTER(mmxext) +DECLARE_LOOP_FILTER(sse2) +DECLARE_LOOP_FILTER(ssse3) + #endif #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ @@ -384,8 +351,8 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; - c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; - c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; + //c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; + //c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; } if (mm_flags & FF_MM_SSSE3) { @@ -395,6 +362,19 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) VP8_BILINEAR_MC_FUNC(0, 16, ssse3); VP8_BILINEAR_MC_FUNC(1, 8, ssse3); VP8_BILINEAR_MC_FUNC(2, 4, ssse3); + + c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; + c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; + + c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3; + c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3; + c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3; + c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3; + + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3; + //c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; + //c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; } if (mm_flags & FF_MM_SSE4) { |