aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorMans Rullgard <mans@mansr.com>2012-06-22 21:40:28 +0100
committerMans Rullgard <mans@mansr.com>2012-06-30 00:12:05 +0100
commitf2fd167835b6f039a593e46ab3a84e1b9a453660 (patch)
treeb1fab7a091df57227003fa639768e3d5dae3f231 /libavcodec/x86
parent3b1ab197be185b61247ef2472f15eeac3e765252 (diff)
downloadffmpeg-f2fd167835b6f039a593e46ab3a84e1b9a453660.tar.gz
x86: vc1: fix and enable optimised loop filter
The problem is that the ssse3 psign instruction does the wrong thing here. Commit ea60dfe incorrectly removed a macro emulating this instruction for pre-ssse3 code. However, the emulation is incorrect, and the code relies on the behaviour of the macro. Specifically, the psign sets destination elements to zero where the corresponding source element is zero, whereas the emulation only negates destination elements where the source is negative. Furthermore, the PSIGNW_MMX macro in x86util.asm is totally bogus, which is why the original VC-1 code had an additional right shift when using it. Since the psign instruction cannot be used here, skip all the macro hell and use the working instruction sequence directly. None of this was noticed due a stray return statement in ff_vc1dsp_init_mmx() which meant that only the mmx version of the loop filter was ever used (before being removed in ea60dfe). Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/vc1dsp_mmx.c2
-rw-r--r--libavcodec/x86/vc1dsp_yasm.asm5
2 files changed, 4 insertions, 3 deletions
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index 4e996f1ce6..717f74f287 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -797,7 +797,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
if (mm_flags & AV_CPU_FLAG_MMX) {
dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
}
- return;
+
if (mm_flags & AV_CPU_FLAG_MMX2) {
ASSIGN_LF(mmx2);
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
diff --git a/libavcodec/x86/vc1dsp_yasm.asm b/libavcodec/x86/vc1dsp_yasm.asm
index 2c5cf22a0a..ced2b5ba88 100644
--- a/libavcodec/x86/vc1dsp_yasm.asm
+++ b/libavcodec/x86/vc1dsp_yasm.asm
@@ -119,7 +119,9 @@ section .text
pand m2, m6
pand m3, m2 ; d final
- PSIGNW m3, m7
+ psraw m7, 15
+ pxor m3, m7
+ psubw m3, m7
psubw m0, m3
paddw m1, m3
packuswb m0, m0
@@ -284,7 +286,6 @@ cglobal vc1_h_loop_filter8_sse2, 3,6,8
RET
%define PABSW PABSW_SSSE3
-%define PSIGNW PSIGNW_SSSE3
INIT_MMX
; void ff_vc1_v_loop_filter4_ssse3(uint8_t *src, int stride, int pq)