diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-05-14 15:56:39 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-05-15 15:23:59 -0400 |
commit | 95a98ab3f0439df82a907233f80a7404b987e838 (patch) | |
tree | a574f9bb10b094dbee0b834fdddcef41b7d1fe18 /libavcodec/x86 | |
parent | 11e33402ca0ed949241133aef1959f8d937982a5 (diff) | |
download | ffmpeg-95a98ab3f0439df82a907233f80a7404b987e838.tar.gz |
ac3dsp: simplify x86 versions of ac3_max_msb_abs_int16
Simplifies the code by using cpuflags and a new macro.
Also fixes the invalid use of the MMX2 pshufw operation in the MMX-only
function.
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 59 | ||||
-rw-r--r-- | libavcodec/x86/ac3dsp_mmx.c | 10 |
2 files changed, 43 insertions, 26 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 1438811fc7..f1e73d375a 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -91,12 +91,36 @@ AC3_EXPONENT_MIN sse2 ; This is used for mmxext and sse2 because they have pminsw/pmaxsw. ;----------------------------------------------------------------------------- -%macro AC3_MAX_MSB_ABS_INT16 2 -cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len +; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word +%macro OR_WORDS_HORIZ 2 ; src, tmp +%if cpuflag(sse2) + movhlps %2, %1 + por %1, %2 + pshuflw %2, %1, q0032 + por %1, %2 + pshuflw %2, %1, q0001 + por %1, %2 +%elif cpuflag(mmx2) + pshufw %2, %1, q0032 + por %1, %2 + pshufw %2, %1, q0001 + por %1, %2 +%else ; mmx + movq %2, %1 + psrlq %2, 32 + por %1, %2 + movq %2, %1 + psrlq %2, 16 + por %1, %2 +%endif +%endmacro + +%macro AC3_MAX_MSB_ABS_INT16 1 +cglobal ac3_max_msb_abs_int16, 2,2,5, src, len pxor m2, m2 pxor m3, m3 .loop: -%ifidn %2, min_max +%ifidn %1, min_max mova m0, [srcq] mova m1, [srcq+mmsize] pminsw m2, m0 @@ -104,7 +128,7 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len pmaxsw m3, m0 pmaxsw m3, m1 %else ; or_abs -%ifidn %1, mmx +%if notcpuflag(ssse3) mova m0, [srcq] mova m1, [srcq+mmsize] ABS2 m0, m1, m3, m4 @@ -119,34 +143,27 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len add srcq, mmsize*2 sub lend, mmsize ja .loop -%ifidn %2, min_max +%ifidn %1, min_max ABS2 m2, m3, m0, m1 por m2, m3 %endif -%ifidn mmsize, 16 - movhlps m0, m2 - por m2, m0 -%endif - PSHUFLW m0, m2, 0xe - por m2, m0 - PSHUFLW m0, m2, 0x1 - por m2, m0 + OR_WORDS_HORIZ m2, m0 movd eax, m2 and eax, 0xFFFF RET %endmacro -INIT_MMX +INIT_MMX mmx %define ABS2 ABS2_MMX -%define PSHUFLW pshufw -AC3_MAX_MSB_ABS_INT16 mmx, or_abs +AC3_MAX_MSB_ABS_INT16 or_abs +INIT_MMX mmx2 %define ABS2 ABS2_MMX2 -AC3_MAX_MSB_ABS_INT16 mmxext, min_max -INIT_XMM -%define PSHUFLW pshuflw -AC3_MAX_MSB_ABS_INT16 sse2, min_max +AC3_MAX_MSB_ABS_INT16 min_max +INIT_XMM sse2 +AC3_MAX_MSB_ABS_INT16 min_max +INIT_XMM ssse3 %define ABS2 ABS2_SSSE3 -AC3_MAX_MSB_ABS_INT16 ssse3, or_abs +AC3_MAX_MSB_ABS_INT16 or_abs ;----------------------------------------------------------------------------- ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c index d6bb469457..1a43183aa0 100644 --- a/libavcodec/x86/ac3dsp_mmx.c +++ b/libavcodec/x86/ac3dsp_mmx.c @@ -27,10 +27,10 @@ extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int n extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs); extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs); -extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len); -extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len); -extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len); -extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len); +extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len); +extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len); +extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len); +extern int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len); extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift); extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift); @@ -67,7 +67,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) } if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) { c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; - c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext; + c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; } if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { c->float_to_fixed24 = ff_float_to_fixed24_sse; |