diff options
author | Christophe GISQUET <christophe.gisquet@gmail.com> | 2012-01-01 18:33:22 +0100 |
---|---|---|
committer | Kostya Shishkov <kostya.shishkov@gmail.com> | 2012-01-12 09:52:33 +0100 |
commit | 3faa303a47e0c3b59a53988e0f76018930c6cb1a (patch) | |
tree | 7e1f4192c2d6ddab81cb80c71deb1586b11e16cf /libavcodec/x86/rv34dsp.asm | |
parent | b2ce3b998b90c9ec8dcefe4b2c45fcf5b2f0a903 (diff) | |
download | ffmpeg-3faa303a47e0c3b59a53988e0f76018930c6cb1a.tar.gz |
rv34: DC-only inverse transform
When decoding coefficients, detect whether the block is DC-only, and take
advantage of this knowledge to perform DC-only inverse transform.
This is achieved by:
- first, changing the 108x4 element modulo_three_table into a 108 element
table (kind of base4), and accessing each value using mask and shifts.
- then, checking low bits for 0 (as they represent the presence of higher
frequency coefficients)
Also provide x86 SIMD code for the DC-only inverse transform.
Signed-off-by: Kostya Shishkov <kostya.shishkov@gmail.com>
Diffstat (limited to 'libavcodec/x86/rv34dsp.asm')
-rw-r--r-- | libavcodec/x86/rv34dsp.asm | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm new file mode 100644 index 0000000000..58f1af0495 --- /dev/null +++ b/libavcodec/x86/rv34dsp.asm @@ -0,0 +1,55 @@ +;****************************************************************************** +;* MMX/SSE2-optimized functions for the RV30 and RV40 decoders +;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com> +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION .text + +%macro IDCT_DC_NOROUND 1 + imul %1, 13*13*3 + sar %1, 11 +%endmacro + +%macro IDCT_DC_ROUND 1 + imul %1, 13*13 + add %1, 0x200 + sar %1, 10 +%endmacro + +%macro rv34_idct_dequant4x4_dc 1 +cglobal rv34_idct_dequant4x4_%1_mmx2, 1, 2, 0 + movsx r1, word [r0] + IDCT_DC r1 + movd mm0, r1 + pshufw mm0, mm0, 0 + movq [r0+ 0], mm0 + movq [r0+16], mm0 + movq [r0+32], mm0 + movq [r0+48], mm0 + REP_RET +%endmacro + +INIT_MMX +%define IDCT_DC IDCT_DC_ROUND +rv34_idct_dequant4x4_dc dc +%define IDCT_DC IDCT_DC_NOROUND +rv34_idct_dequant4x4_dc dc_noround |