diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2011-02-10 12:20:36 -0500 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-02-10 15:32:47 -0500 |
commit | dda3f0ef48aa5c3b03566b60b6bf63211e1fe579 (patch) | |
tree | 0adb6558e2581beb7019d89998dd8d7ba210be65 | |
parent | b4668274b944abae61759e796c5cc36ade510f24 (diff) | |
download | ffmpeg-dda3f0ef48aa5c3b03566b60b6bf63211e1fe579.tar.gz |
Add x86-optimized versions of exponent_min().
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
-rw-r--r-- | libavcodec/Makefile | 6 | ||||
-rw-r--r-- | libavcodec/ac3dsp.c | 51 | ||||
-rw-r--r-- | libavcodec/ac3dsp.h | 43 | ||||
-rw-r--r-- | libavcodec/ac3enc.c | 33 | ||||
-rw-r--r-- | libavcodec/x86/Makefile | 4 | ||||
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 67 | ||||
-rw-r--r-- | libavcodec/x86/ac3dsp_mmx.c | 45 | ||||
-rw-r--r-- | libavcodec/x86/x86util.asm | 10 |
8 files changed, 230 insertions, 29 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index fa6c0bb9cd..682b626800 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -55,8 +55,10 @@ OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o \ mpeg4audio.o OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o OBJS-$(CONFIG_AC3_DECODER) += ac3dec.o ac3dec_data.o ac3.o -OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_float.o ac3tab.o ac3.o -OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3tab.o ac3.o +OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_float.o ac3tab.o ac3.o \ + ac3dsp.o +OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3tab.o ac3.o \ + ac3dsp.o OBJS-$(CONFIG_ALAC_DECODER) += alac.o OBJS-$(CONFIG_ALAC_ENCODER) += alacenc.o OBJS-$(CONFIG_ALS_DECODER) += alsdec.o bgmc.o mpeg4audio.o diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c new file mode 100644 index 0000000000..f688e6a72b --- /dev/null +++ b/libavcodec/ac3dsp.c @@ -0,0 +1,51 @@ +/* + * AC-3 DSP utils + * Copyright (c) 2011 Justin Ruggles + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avcodec.h" +#include "ac3dsp.h" + +static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs) +{ + int blk, i; + + if (!num_reuse_blocks) + return; + + for (i = 0; i < nb_coefs; i++) { + uint8_t min_exp = *exp; + uint8_t *exp1 = exp + 256; + for (blk = 0; blk < num_reuse_blocks; blk++) { + uint8_t next_exp = *exp1; + if (next_exp < min_exp) + min_exp = next_exp; + exp1 += 256; + } + *exp++ = min_exp; + } +} + +av_cold void ff_ac3dsp_init(AC3DSPContext *c) +{ + c->ac3_exponent_min = ac3_exponent_min_c; + + if (HAVE_MMX) + ff_ac3dsp_init_x86(c); +} diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h new file mode 100644 index 0000000000..7f13b11f3b --- /dev/null +++ b/libavcodec/ac3dsp.h @@ -0,0 +1,43 @@ +/* + * AC-3 DSP utils + * Copyright (c) 2011 Justin Ruggles + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_AC3DSP_H +#define AVCODEC_AC3DSP_H + +#include <stdint.h> + +typedef struct AC3DSPContext { + /** + * Set each encoded exponent in a block to the minimum of itself and the + * exponents in the same frequency bin of up to 5 following blocks. + * @param exp pointer to the start of the current block of exponents. + * constraints: align 16 + * @param num_reuse_blocks number of blocks that will reuse exponents from the current block. + * constraints: range 0 to 5 + * @param nb_coefs number of frequency coefficients. + */ + void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs); +} AC3DSPContext; + +void ff_ac3dsp_init (AC3DSPContext *c); +void ff_ac3dsp_init_x86(AC3DSPContext *c); + +#endif /* AVCODEC_AC3DSP_H */ diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index e41a0aec65..851fdc9caf 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -33,6 +33,7 @@ #include "avcodec.h" #include "put_bits.h" #include "dsputil.h" +#include "ac3dsp.h" #include "ac3.h" #include "audioconvert.h" @@ -86,6 +87,7 @@ typedef struct AC3Block { typedef struct AC3EncodeContext { PutBitContext pb; ///< bitstream writer context DSPContext dsp; + AC3DSPContext ac3dsp; ///< AC-3 optimized functions AC3MDCTContext mdct; ///< MDCT context AC3Block blocks[AC3_MAX_BLOCKS]; ///< per-block info @@ -458,7 +460,6 @@ static void compute_exp_strategy_ch(AC3EncodeContext *s, uint8_t *exp_strategy, exp_strategy[blk] = EXP_REUSE; exp += AC3_MAX_COEFS; } - emms_c(); /* now select the encoding strategy type : if exponents are often recoded, we use a coarse encoding */ @@ -499,31 +500,6 @@ static void compute_exp_strategy(AC3EncodeContext *s) /** - * Set each encoded exponent in a block to the minimum of itself and the - * exponents in the same frequency bin of up to 5 following blocks. - */ -static void exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs) -{ - int blk, i; - - if (!num_reuse_blocks) - return; - - for (i = 0; i < nb_coefs; i++) { - uint8_t min_exp = *exp; - uint8_t *exp1 = exp + AC3_MAX_COEFS; - for (blk = 0; blk < num_reuse_blocks; blk++) { - uint8_t next_exp = *exp1; - if (next_exp < min_exp) - min_exp = next_exp; - exp1 += AC3_MAX_COEFS; - } - *exp++ = min_exp; - } -} - - -/** * Update the exponents so that they are the ones the decoder will decode. */ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) @@ -616,7 +592,7 @@ static void encode_exponents(AC3EncodeContext *s) num_reuse_blocks = blk1 - blk - 1; /* for the EXP_REUSE case we select the min of the exponents */ - exponent_min(exp, num_reuse_blocks, nb_coefs); + s->ac3dsp.ac3_exponent_min(exp, num_reuse_blocks, nb_coefs); encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk]); @@ -704,6 +680,8 @@ static void process_exponents(AC3EncodeContext *s) encode_exponents(s); group_exponents(s); + + emms_c(); } @@ -1856,6 +1834,7 @@ static av_cold int ac3_encode_init(AVCodecContext *avctx) avctx->coded_frame= avcodec_alloc_frame(); dsputil_init(&s->dsp, avctx); + ff_ac3dsp_init(&s->ac3dsp); return 0; init_fail: diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 83cec00442..1b58fa121f 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -17,6 +17,10 @@ MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o +MMX-OBJS-$(CONFIG_AC3_ENCODER) += x86/ac3dsp_mmx.o +MMX-OBJS-$(CONFIG_AC3_FIXED_ENCODER) += x86/ac3dsp_mmx.o +YASM-OBJS-$(CONFIG_AC3_ENCODER) += x86/ac3dsp.o +YASM-OBJS-$(CONFIG_AC3_FIXED_ENCODER) += x86/ac3dsp.o MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o MMX-OBJS-$(CONFIG_MP1FLOAT_DECODER) += x86/mpegaudiodec_mmx.o MMX-OBJS-$(CONFIG_MP2FLOAT_DECODER) += x86/mpegaudiodec_mmx.o diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm new file mode 100644 index 0000000000..e71c51cf33 --- /dev/null +++ b/libavcodec/x86/ac3dsp.asm @@ -0,0 +1,67 @@ +;***************************************************************************** +;* x86-optimized AC-3 DSP utils +;* Copyright (c) 2011 Justin Ruggles +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION .text + +;----------------------------------------------------------------------------- +; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs) +;----------------------------------------------------------------------------- + +%macro AC3_EXPONENT_MIN 1 +cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset + shl reuse_blksq, 8 + jz .end + LOOP_ALIGN +.nextexp: + mov offsetq, reuse_blksq + mova m0, [expq+offsetq] + sub offsetq, 256 + LOOP_ALIGN +.nextblk: + PMINUB m0, [expq+offsetq], m1 + sub offsetq, 256 + jae .nextblk + mova [expq], m0 + add expq, mmsize + sub expnq, mmsize + jg .nextexp +.end: + REP_RET +%endmacro + +%define PMINUB PMINUB_MMX +%define LOOP_ALIGN +INIT_MMX +AC3_EXPONENT_MIN mmx +%ifdef HAVE_MMX2 +%define PMINUB PMINUB_MMXEXT +%define LOOP_ALIGN ALIGN 16 +AC3_EXPONENT_MIN mmxext +%endif +%ifdef HAVE_SSE +INIT_XMM +AC3_EXPONENT_MIN sse2 +%endif +%undef PMINUB +%undef LOOP_ALIGN diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c new file mode 100644 index 0000000000..7ce3aa358d --- /dev/null +++ b/libavcodec/x86/ac3dsp_mmx.c @@ -0,0 +1,45 @@ +/* + * x86-optimized AC-3 DSP utils + * Copyright (c) 2011 Justin Ruggles + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86_cpu.h" +#include "dsputil_mmx.h" +#include "libavcodec/ac3dsp.h" + +extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs); +extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs); +extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs); + +av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c) +{ + int mm_flags = av_get_cpu_flags(); + +#if HAVE_YASM + if (mm_flags & AV_CPU_FLAG_MMX) { + c->ac3_exponent_min = ff_ac3_exponent_min_mmx; + } + if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) { + c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; + } + if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { + c->ac3_exponent_min = ff_ac3_exponent_min_sse2; + } +#endif +} diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm index 7cabc70b31..b28a6198f7 100644 --- a/libavcodec/x86/x86util.asm +++ b/libavcodec/x86/x86util.asm @@ -434,3 +434,13 @@ movh [%7], %3 movh [%7+%8], %4 %endmacro + +%macro PMINUB_MMX 3 ; dst, src, tmp + mova %3, %1 + psubusb %3, %2 + psubb %1, %3 +%endmacro + +%macro PMINUB_MMXEXT 3 ; dst, src, ignored + pminub %1, %2 +%endmacro |