aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHendrik Leppkes <h.leppkes@gmail.com>2016-01-07 03:29:21 +0100
committerHendrik Leppkes <h.leppkes@gmail.com>2016-01-08 11:56:43 +0100
commit53ada3af62d566bfd53dfc0a90b79cb91328615e (patch)
treea9f4c24ae514ea432a7379fa5c3158a98a2bb13b
parent08aec7c1bda4b5f084ff886e3a962278e25ed126 (diff)
downloadffmpeg-53ada3af62d566bfd53dfc0a90b79cb91328615e.tar.gz
x86/vf_w3fdif: 32-bit compatibility for w3fdif_simple_high
-rw-r--r--libavfilter/x86/vf_w3fdif.asm35
-rw-r--r--libavfilter/x86/vf_w3fdif_init.c2
2 files changed, 34 insertions, 3 deletions
diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm
index c3c73eaba6..52628c38d7 100644
--- a/libavfilter/x86/vf_w3fdif.asm
+++ b/libavfilter/x86/vf_w3fdif.asm
@@ -102,14 +102,22 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
REP_RET
%if ARCH_X86_64
-
cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+%else
+cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+%endif
movq m2, [coefq]
+%if ARCH_X86_64
DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2
+ xor offsetq, offsetq
+%else
+ DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2
+ %define linesized r4mp
+%endif
+
pshufd m0, m2, q0000
SPLATW m2, m2, 2
pxor m7, m7
- mov offsetq, 0
mov in_lines_cur2q, [in_lines_cur0q+gprsize*2]
mov in_lines_cur1q, [in_lines_cur0q+gprsize]
mov in_lines_cur0q, [in_lines_cur0q]
@@ -117,8 +125,21 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
mov in_lines_adj1q, [in_lines_adj0q+gprsize]
mov in_lines_adj0q, [in_lines_adj0q]
+%if ARCH_X86_32
+ sub in_lines_cur1q, in_lines_cur0q
+ sub in_lines_cur2q, in_lines_cur0q
+ sub in_lines_adj0q, in_lines_cur0q
+ sub in_lines_adj1q, in_lines_cur0q
+ sub in_lines_adj2q, in_lines_cur0q
+ %define offsetq in_lines_cur0q
+%endif
+
.loop:
+%if ARCH_X86_64
movh m3, [in_lines_cur0q+offsetq]
+%else
+ movh m3, [in_lines_cur0q]
+%endif
movh m4, [in_lines_cur1q+offsetq]
punpcklbw m3, m7
punpcklbw m4, m7
@@ -143,15 +164,25 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
pmaddwd m6, m2
paddd m3, m5
paddd m4, m6
+%if ARCH_X86_64
paddd m3, [work_lineq+offsetq*4]
paddd m4, [work_lineq+offsetq*4+mmsize]
mova [work_lineq+offsetq*4], m3
mova [work_lineq+offsetq*4+mmsize], m4
+%else
+ paddd m3, [work_lineq]
+ paddd m4, [work_lineq+mmsize]
+ mova [work_lineq], m3
+ mova [work_lineq+mmsize], m4
+ add work_lineq, mmsize*2
+%endif
add offsetq, mmsize/2
sub linesized, mmsize/2
jg .loop
REP_RET
+%if ARCH_X86_64
+
cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
movq m0, [coefq+0]
movd m4, [coefq+8]
diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c
index 72ea657c8f..9bf06e84a5 100644
--- a/libavfilter/x86/vf_w3fdif_init.c
+++ b/libavfilter/x86/vf_w3fdif_init.c
@@ -51,12 +51,12 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp)
if (EXTERNAL_SSE2(cpu_flags)) {
dsp->filter_simple_low = ff_w3fdif_simple_low_sse2;
+ dsp->filter_simple_high = ff_w3fdif_simple_high_sse2;
dsp->filter_complex_low = ff_w3fdif_complex_low_sse2;
dsp->filter_scale = ff_w3fdif_scale_sse2;
}
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
- dsp->filter_simple_high = ff_w3fdif_simple_high_sse2;
dsp->filter_complex_high = ff_w3fdif_complex_high_sse2;
}
}