diff options
author | skal <pascal.massimino@gmail.com> | 2014-09-03 11:02:32 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-09-04 22:19:00 +0200 |
commit | 406a9ccffea8fc36c1e56c07ca32c559ccce52af (patch) | |
tree | 10c9fb8d62af811393a63a472ca7d286d2947b9a /libavfilter/x86/vf_idet.asm | |
parent | 53b089200557b299174bec72415b166ad43083a6 (diff) | |
download | ffmpeg-406a9ccffea8fc36c1e56c07ca32c559ccce52af.tar.gz |
avfilter/vf_idet: MMX/MMXEXT/SSE2 implementation of idet's filter_line()
integration by Neil Birkbeck, with help from Vitor Sessak.
core SSE2 loop by Skal (pascal.massimino@gmail.com)
Reviewed-by: Clément Bœsch <u@pkh.me>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavfilter/x86/vf_idet.asm')
-rw-r--r-- | libavfilter/x86/vf_idet.asm | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm new file mode 100644 index 0000000000..fb29804a0c --- /dev/null +++ b/libavfilter/x86/vf_idet.asm @@ -0,0 +1,114 @@ +; ***************************************************************************** +; * x86-optimized functions for idet filter +; * +; * This file is part of FFmpeg. +; * +; * FFmpeg is free software; you can redistribute it and/or modify +; * it under the terms of the GNU General Public License as published by +; * the Free Software Foundation; either version 2 of the License, or +; * (at your option) any later version. +; * +; * FFmpeg is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; * GNU General Public License for more details. +; * +; * You should have received a copy of the GNU General Public License along +; * with FFmpeg; if not, write to the Free Software Foundation, Inc., +; * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +; ****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_TEXT + +%if ARCH_X86_32 + +; Implementation that does 8-bytes at a time using single-word operations. +%macro IDET_FILTER_LINE 1 +INIT_MMX %1 +cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index + xor indexq, indexq +%define m_zero m2 +%define m_sum m5 + pxor m_sum, m_sum + pxor m_zero, m_zero + +.loop: + movu m0, [aq + indexq*1] + punpckhbw m1, m0, m_zero + punpcklbw m0, m_zero + + movu m3, [cq + indexq*1] + punpckhbw m4, m3, m_zero + punpcklbw m3, m_zero + + paddsw m1, m4 + paddsw m0, m3 + + movu m3, [bq + indexq*1] + punpckhbw m4, m3, m_zero + punpcklbw m3, m_zero + + paddw m4, m4 + paddw m3, m3 + psubsw m1, m4 + psubsw m0, m3 + + ABS2 m1, m0, m4, m3 + + paddw m0, m1 + punpckhwd m1, m0, m_zero + punpcklwd m0, m_zero + + paddd m0, m1 + paddd m_sum, m0 + + add indexq, 0x8 + CMP widthd, indexd + jg .loop + + mova m0, m_sum + psrlq m_sum, 0x20 + paddq m0, m_sum + movd eax, m0 + RET +%endmacro + +IDET_FILTER_LINE mmxext +IDET_FILTER_LINE mmx +%endif + +; SSE2 8-bit implementation that does 16-bytes at a time: +INIT_XMM sse2 +cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total + xor indexq, indexq + pxor m0, m0 + pxor m1, m1 + +.sse2_loop: + movu m2, [bq + indexq*1] ; B + movu m3, [aq + indexq*1] ; A + mova m6, m2 + mova m4, m3 + psubusb m5, m2, m3 ; ba + + movu m3, [cq + indexq*1] ; C + add indexq, 0x10 + psubusb m4, m2 ; ab + CMP indexd, widthd + + psubusb m6, m3 ; bc + psubusb m3, m2 ; cb + + psadbw m4, m6 ; |ab - bc| + paddq m0, m4 + psadbw m5, m3 ; |ba - cb| + paddq m1, m5 + jl .sse2_loop + + paddq m0, m1 + movhlps m1, m0 + paddq m0, m1 + movd eax, m0 + RET |