diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-11-03 13:38:38 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-11-03 13:38:38 +0100 |
commit | 1dad4867146007486d6e84e59454734d1fc804c5 (patch) | |
tree | 84d90dde2ff8ec5c8bec330aab43f05dd5025ade | |
parent | 1c755f76676eeaebbeb8763390c679a9b735c334 (diff) | |
parent | 9ce02e14f01de50fcc6f7f459544b140be66d615 (diff) | |
download | ffmpeg-1dad4867146007486d6e84e59454734d1fc804c5.tar.gz |
Merge commit '9ce02e14f01de50fcc6f7f459544b140be66d615'
* commit '9ce02e14f01de50fcc6f7f459544b140be66d615':
x86: ac3dsp: port to cpuflags
x86util: Add cpuflags_mmxext alias for cpuflags_mmx2
x86inc: Only define program_name if the macro is unset
Conflicts:
libavcodec/x86/ac3dsp.asm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 84 | ||||
-rw-r--r-- | libavutil/x86/x86inc.asm | 4 | ||||
-rw-r--r-- | libavutil/x86/x86util.asm | 2 |
3 files changed, 47 insertions, 43 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 2edf0f0adc..d97d8d3b0b 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -41,8 +41,8 @@ SECTION .text ; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs) ;----------------------------------------------------------------------------- -%macro AC3_EXPONENT_MIN 1 -cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset +%macro AC3_EXPONENT_MIN 0 +cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset shl reuse_blksq, 8 jz .end LOOP_ALIGN @@ -65,16 +65,17 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset %define PMINUB PMINUB_MMX %define LOOP_ALIGN -INIT_MMX -AC3_EXPONENT_MIN mmx +INIT_MMX mmx +AC3_EXPONENT_MIN %if HAVE_MMXEXT_EXTERNAL %define PMINUB PMINUB_MMXEXT %define LOOP_ALIGN ALIGN 16 -AC3_EXPONENT_MIN mmxext +INIT_MMX mmxext +AC3_EXPONENT_MIN %endif %if HAVE_SSE2_EXTERNAL -INIT_XMM -AC3_EXPONENT_MIN sse2 +INIT_XMM sse2 +AC3_EXPONENT_MIN %endif %undef PMINUB %undef LOOP_ALIGN @@ -168,8 +169,8 @@ AC3_MAX_MSB_ABS_INT16 or_abs ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() ;----------------------------------------------------------------------------- -%macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set -cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift +%macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set +cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift movd m0, shiftd .loop: mova m1, [srcq ] @@ -195,19 +196,19 @@ cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift ; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift) ;----------------------------------------------------------------------------- -INIT_MMX -AC3_SHIFT l, 16, psllw, mmx -INIT_XMM -AC3_SHIFT l, 16, psllw, sse2 +INIT_MMX mmx +AC3_SHIFT l, 16, psllw +INIT_XMM sse2 +AC3_SHIFT l, 16, psllw ;----------------------------------------------------------------------------- ; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift) ;----------------------------------------------------------------------------- -INIT_MMX -AC3_SHIFT r, 32, psrad, mmx -INIT_XMM -AC3_SHIFT r, 32, psrad, sse2 +INIT_MMX mmx +AC3_SHIFT r, 32, psrad +INIT_XMM sse2 +AC3_SHIFT r, 32, psrad ;----------------------------------------------------------------------------- ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len) @@ -215,8 +216,8 @@ AC3_SHIFT r, 32, psrad, sse2 ; The 3DNow! version is not bit-identical because pf2id uses truncation rather ; than round-to-nearest. -INIT_MMX -cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len +INIT_MMX 3dnow +cglobal float_to_fixed24, 3, 3, 0, dst, src, len movq m0, [pf_1_24] .loop: movq m1, [srcq ] @@ -242,8 +243,8 @@ cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len femms RET -INIT_XMM -cglobal float_to_fixed24_sse, 3,3,3, dst, src, len +INIT_XMM sse +cglobal float_to_fixed24, 3, 3, 3, dst, src, len movaps m0, [pf_1_24] .loop: movaps m1, [srcq ] @@ -267,8 +268,8 @@ cglobal float_to_fixed24_sse, 3,3,3, dst, src, len emms RET -INIT_XMM -cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len +INIT_XMM sse2 +cglobal float_to_fixed24, 3, 3, 9, dst, src, len movaps m0, [pf_1_24] .loop: movaps m1, [srcq ] @@ -332,8 +333,8 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len paddd %1, %2 %endmacro -INIT_XMM -cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum +INIT_XMM sse2 +cglobal ac3_compute_mantissa_size, 1, 2, 4, mant_cnt, sum movdqa m0, [mant_cntq ] movdqa m1, [mant_cntq+ 1*16] paddw m0, [mant_cntq+ 2*16] @@ -373,20 +374,20 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum ; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs) ;------------------------------------------------------------------------------ -%macro PABSD_MMX 2 ; src/dst, tmp +%macro PABSD 1-2 ; src/dst, unused +%if cpuflag(ssse3) + pabsd %1, %1 +%else ; src/dst, tmp pxor %2, %2 pcmpgtd %2, %1 pxor %1, %2 psubd %1, %2 -%endmacro - -%macro PABSD_SSSE3 1-2 ; src/dst, unused - pabsd %1, %1 +%endif %endmacro %if HAVE_AMD3DNOW_EXTERNAL -INIT_MMX -cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len +INIT_MMX 3dnow +cglobal ac3_extract_exponents, 3, 3, 0, exp, coef, len add expq, lenq lea coefq, [coefq+4*lenq] neg lenq @@ -395,8 +396,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len .loop: movq m0, [coefq+4*lenq ] movq m1, [coefq+4*lenq+8] - PABSD_MMX m0, m2 - PABSD_MMX m1, m2 + PABSD m0, m2 + PABSD m1, m2 pslld m0, 1 por m0, m3 pi2fd m2, m0 @@ -420,8 +421,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len REP_RET %endif -%macro AC3_EXTRACT_EXPONENTS 1 -cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len +%macro AC3_EXTRACT_EXPONENTS 0 +cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len add expq, lenq lea coefq, [coefq+4*lenq] neg lenq @@ -453,11 +454,10 @@ cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len %endmacro %if HAVE_SSE2_EXTERNAL -INIT_XMM -%define PABSD PABSD_MMX -AC3_EXTRACT_EXPONENTS sse2 -%if HAVE_SSSE3_EXTERNAL -%define PABSD PABSD_SSSE3 -AC3_EXTRACT_EXPONENTS ssse3 +INIT_XMM sse2 +AC3_EXTRACT_EXPONENTS %endif +%if HAVE_SSSE3_EXTERNAL +INIT_XMM ssse3 +AC3_EXTRACT_EXPONENTS %endif diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index c1827fbbad..01d7655a17 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -34,7 +34,9 @@ ; as this feature might be useful for others as well. Send patches or ideas ; to x264-devel@videolan.org . -%define program_name ff +%ifndef program_name + %define program_name ff +%endif %define WIN64 0 %define UNIX64 0 diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 74abcbecab..89e8cd82f4 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -23,6 +23,8 @@ ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** +%define cpuflags_mmxext cpuflags_mmx2 + %include "libavutil/x86/x86inc.asm" %macro SBUTTERFLY 4 |