diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-08-26 22:36:13 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-08-26 22:40:02 +0200 |
commit | 17106a7c90902c7cddc5ad516234bd0e37d27732 (patch) | |
tree | 674b2b4bf6bba67f441c925511fc1cabe5bda74a /libavfilter/x86 | |
parent | 3a621c9d9929bcad3ae042c4c33308b4544c7cb1 (diff) | |
parent | d7f9786cbcd3fede7c751f1c1f481e55ee2380bd (diff) | |
download | ffmpeg-17106a7c90902c7cddc5ad516234bd0e37d27732.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
audio_frame_queue: Clean up ff_af_queue_log_state debug function
dwt: Remove unused code.
cavs: convert cavsdata.h to a .c file
cavs: Move inline functions only used in one file out of the header
cavs: Move data tables used in only one place to that file
fate: Add a single symbol Ut Video decoder test
vf_hqdn3d: x86 asm
vf_hqdn3d: support 16bit colordepth
avconv: prefer user-forced input framerate when choosing output framerate
Conflicts:
ffmpeg.c
libavcodec/audio_frame_queue.c
libavcodec/dwt.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavfilter/x86')
-rw-r--r-- | libavfilter/x86/Makefile | 1 | ||||
-rw-r--r-- | libavfilter/x86/hqdn3d.asm | 106 |
2 files changed, 107 insertions, 0 deletions
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index e98693d654..46fc84f3ec 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -1,2 +1,3 @@ MMX-OBJS-$(CONFIG_YADIF_FILTER) += x86/yadif.o MMX-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/gradfun.o +YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/hqdn3d.o diff --git a/libavfilter/x86/hqdn3d.asm b/libavfilter/x86/hqdn3d.asm new file mode 100644 index 0000000000..88b9b0d034 --- /dev/null +++ b/libavfilter/x86/hqdn3d.asm @@ -0,0 +1,106 @@ +;****************************************************************************** +;* Copyright (c) 2012 Loren Merritt +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" + +SECTION .text + +%macro LOWPASS 3 ; prevsample, cursample, lut + sub %1q, %2q +%if lut_bits != 8 + sar %1q, 8-lut_bits +%endif + movsx %1d, word [%3q+%1q*2] + add %1d, %2d +%endmacro + +%macro LOAD 3 ; dstreg, x, bitdepth +%if %3 == 8 + movzx %1, byte [srcq+%2] +%else + movzx %1, word [srcq+(%2)*2] +%endif +%if %3 != 16 + shl %1, 16-%3 +%endif +%endmacro + +%macro HQDN3D_ROW 1 ; bitdepth +%if ARCH_X86_64 +cglobal hqdn3d_row_%1_x86, 7,10,0, src, dst, lineant, frameant, width, spatial, temporal, pixelant, t0, t1 +%else +cglobal hqdn3d_row_%1_x86, 7,7,0, src, dst, lineant, frameant, width, spatial, temporal +%endif + %assign bytedepth (%1+7)>>3 + %assign lut_bits 4+4*(%1/16) + dec widthq + lea srcq, [srcq+widthq*bytedepth] + lea dstq, [dstq+widthq*bytedepth] + lea frameantq, [frameantq+widthq*2] + lea lineantq, [lineantq+widthq*2] + neg widthq + %define xq widthq +%if ARCH_X86_32 + mov dstmp, dstq + mov srcmp, srcq + mov frameantmp, frameantq + mov lineantmp, lineantq + %define dstq r0 + %define frameantq r0 + %define lineantq r0 + %define pixelantq r1 + %define pixelantd r1d + DECLARE_REG_TMP 2,3 +%endif + LOAD pixelantd, xq, %1 +ALIGN 16 +.loop: + movifnidn srcq, srcmp + LOAD t0d, xq+1, %1 ; skip on the last iteration to avoid overread +.loop2: + movifnidn lineantq, lineantmp + movzx t1d, word [lineantq+xq*2] + LOWPASS t1, pixelant, spatial + mov [lineantq+xq*2], t1w + LOWPASS pixelant, t0, spatial + movifnidn frameantq, frameantmp + movzx t0d, word [frameantq+xq*2] + LOWPASS t0, t1, temporal + mov [frameantq+xq*2], t0w + movifnidn dstq, dstmp +%if %1 != 16 + add t0d, (1<<(15-%1))-1 + shr t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation +%endif +%if %1 == 8 + mov [dstq+xq], t0b +%else + mov [dstq+xq*2], t0w +%endif + inc xq + jl .loop + je .loop2 + REP_RET +%endmacro ; HQDN3D_ROW + +HQDN3D_ROW 8 +HQDN3D_ROW 9 +HQDN3D_ROW 10 +HQDN3D_ROW 16 |