diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2010-07-22 01:35:26 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-07-22 01:35:26 +0000 |
commit | 003243c3c2bdfa485eedbed593a0bb2feae66ab9 (patch) | |
tree | 05c3699e1257151ab1c8b02c3b3946e062d2df0f | |
parent | c7b1d9768c20ba5848f503f5258b4429fd303f3d (diff) | |
download | ffmpeg-003243c3c2bdfa485eedbed593a0bb2feae66ab9.tar.gz |
Fix and enable horizontal >=SSE2 mbedge loopfilter.
Originally committed as revision 24409 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/x86/vp8dsp-init.c | 12 | ||||
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 4 |
2 files changed, 8 insertions, 8 deletions
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c index 8c0f415c6b..66ae884705 100644 --- a/libavcodec/x86/vp8dsp-init.c +++ b/libavcodec/x86/vp8dsp-init.c @@ -343,16 +343,16 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; - c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; - c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; + c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2; + c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2; } if (mm_flags & FF_MM_SSE2) { c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; - //c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; - //c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; } if (mm_flags & FF_MM_SSSE3) { @@ -372,9 +372,9 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3; c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3; - //c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; + c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; - //c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; + c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; } if (mm_flags & FF_MM_SSE4) { diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index c3b6109128..c9d7d383fb 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -2513,8 +2513,8 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %else ; sse2 (h) lea dst8_reg, [dst8_reg+mstride_reg+1] WRITE_4x4D 1, 2, 3, 4, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %4 - add dst_reg, 4 - add dst8_reg, 4 + lea dst_reg, [dst2_reg+mstride_reg+4] + lea dst8_reg, [dst8_reg+mstride_reg+4] WRITE_8W m5, m5, dst2_reg, dst_reg, mstride_reg, stride_reg WRITE_8W m6, m6, dst2_reg, dst8_reg, mstride_reg, stride_reg %endif |