diff options
author | Hendrik Leppkes <h.leppkes@gmail.com> | 2016-01-07 03:29:21 +0100 |
---|---|---|
committer | Hendrik Leppkes <h.leppkes@gmail.com> | 2016-01-08 11:56:43 +0100 |
commit | 53ada3af62d566bfd53dfc0a90b79cb91328615e (patch) | |
tree | a9f4c24ae514ea432a7379fa5c3158a98a2bb13b | |
parent | 08aec7c1bda4b5f084ff886e3a962278e25ed126 (diff) | |
download | ffmpeg-53ada3af62d566bfd53dfc0a90b79cb91328615e.tar.gz |
x86/vf_w3fdif: 32-bit compatibility for w3fdif_simple_high
-rw-r--r-- | libavfilter/x86/vf_w3fdif.asm | 35 | ||||
-rw-r--r-- | libavfilter/x86/vf_w3fdif_init.c | 2 |
2 files changed, 34 insertions, 3 deletions
diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm index c3c73eaba6..52628c38d7 100644 --- a/libavfilter/x86/vf_w3fdif.asm +++ b/libavfilter/x86/vf_w3fdif.asm @@ -102,14 +102,22 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize REP_RET %if ARCH_X86_64 - cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize +%else +cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize +%endif movq m2, [coefq] +%if ARCH_X86_64 DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2 + xor offsetq, offsetq +%else + DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2 + %define linesized r4mp +%endif + pshufd m0, m2, q0000 SPLATW m2, m2, 2 pxor m7, m7 - mov offsetq, 0 mov in_lines_cur2q, [in_lines_cur0q+gprsize*2] mov in_lines_cur1q, [in_lines_cur0q+gprsize] mov in_lines_cur0q, [in_lines_cur0q] @@ -117,8 +125,21 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, mov in_lines_adj1q, [in_lines_adj0q+gprsize] mov in_lines_adj0q, [in_lines_adj0q] +%if ARCH_X86_32 + sub in_lines_cur1q, in_lines_cur0q + sub in_lines_cur2q, in_lines_cur0q + sub in_lines_adj0q, in_lines_cur0q + sub in_lines_adj1q, in_lines_cur0q + sub in_lines_adj2q, in_lines_cur0q + %define offsetq in_lines_cur0q +%endif + .loop: +%if ARCH_X86_64 movh m3, [in_lines_cur0q+offsetq] +%else + movh m3, [in_lines_cur0q] +%endif movh m4, [in_lines_cur1q+offsetq] punpcklbw m3, m7 punpcklbw m4, m7 @@ -143,15 +164,25 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, pmaddwd m6, m2 paddd m3, m5 paddd m4, m6 +%if ARCH_X86_64 paddd m3, [work_lineq+offsetq*4] paddd m4, [work_lineq+offsetq*4+mmsize] mova [work_lineq+offsetq*4], m3 mova [work_lineq+offsetq*4+mmsize], m4 +%else + paddd m3, [work_lineq] + paddd m4, [work_lineq+mmsize] + mova [work_lineq], m3 + mova [work_lineq+mmsize], m4 + add work_lineq, mmsize*2 +%endif add offsetq, mmsize/2 sub linesized, mmsize/2 jg .loop REP_RET +%if ARCH_X86_64 + cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize movq m0, [coefq+0] movd m4, [coefq+8] diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c index 72ea657c8f..9bf06e84a5 100644 --- a/libavfilter/x86/vf_w3fdif_init.c +++ b/libavfilter/x86/vf_w3fdif_init.c @@ -51,12 +51,12 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp) if (EXTERNAL_SSE2(cpu_flags)) { dsp->filter_simple_low = ff_w3fdif_simple_low_sse2; + dsp->filter_simple_high = ff_w3fdif_simple_high_sse2; dsp->filter_complex_low = ff_w3fdif_complex_low_sse2; dsp->filter_scale = ff_w3fdif_scale_sse2; } if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { - dsp->filter_simple_high = ff_w3fdif_simple_high_sse2; dsp->filter_complex_high = ff_w3fdif_complex_high_sse2; } } |