diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-05-30 00:59:15 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-05-30 00:59:15 +0200 |
commit | 40f3a87c10d5773eb66e09f4ed3d8197b1840863 (patch) | |
tree | bee50efb5beaff032de1fc4dcd797e3000b30c4c /libavcodec/x86/dsputil.asm | |
parent | 5c6e94c42bcdc026071855a6b1749406b2456c8b (diff) | |
parent | 054013a0fc6f2b52c60cee3e051be8cc7f82cef3 (diff) | |
download | ffmpeg-40f3a87c10d5773eb66e09f4ed3d8197b1840863.tar.gz |
Merge commit '054013a0fc6f2b52c60cee3e051be8cc7f82cef3'
* commit '054013a0fc6f2b52c60cee3e051be8cc7f82cef3':
dsputil: Move APE-specific bits into apedsp
Conflicts:
libavcodec/arm/int_neon.S
libavcodec/x86/dsputil.asm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/dsputil.asm')
-rw-r--r-- | libavcodec/x86/dsputil.asm | 127 |
1 files changed, 0 insertions, 127 deletions
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index eae71ab28c..2209c52541 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -53,45 +53,6 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order emms %endif RET - -; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, -; int order, int mul) -cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul - shl orderq, 1 - movd m7, mulm -%if mmsize == 16 - pshuflw m7, m7, 0 - punpcklqdq m7, m7 -%else - pshufw m7, m7, 0 -%endif - pxor m6, m6 - add v1q, orderq - add v2q, orderq - add v3q, orderq - neg orderq -.loop: - movu m0, [v2q + orderq] - movu m1, [v2q + orderq + mmsize] - mova m4, [v1q + orderq] - mova m5, [v1q + orderq + mmsize] - movu m2, [v3q + orderq] - movu m3, [v3q + orderq + mmsize] - pmaddwd m0, m4 - pmaddwd m1, m5 - pmullw m2, m7 - pmullw m3, m7 - paddd m6, m0 - paddd m6, m1 - paddw m2, m4 - paddw m3, m5 - mova [v1q + orderq], m2 - mova [v1q + orderq + mmsize], m3 - add orderq, mmsize*2 - jl .loop - HADDD m6, m0 - movd eax, m6 - RET %endmacro INIT_MMX mmxext @@ -99,94 +60,6 @@ SCALARPRODUCT INIT_XMM sse2 SCALARPRODUCT -%macro SCALARPRODUCT_LOOP 1 -align 16 -.loop%1: - sub orderq, mmsize*2 -%if %1 - mova m1, m4 - mova m4, [v2q + orderq] - mova m0, [v2q + orderq + mmsize] - palignr m1, m0, %1 - palignr m0, m4, %1 - mova m3, m5 - mova m5, [v3q + orderq] - mova m2, [v3q + orderq + mmsize] - palignr m3, m2, %1 - palignr m2, m5, %1 -%else - mova m0, [v2q + orderq] - mova m1, [v2q + orderq + mmsize] - mova m2, [v3q + orderq] - mova m3, [v3q + orderq + mmsize] -%endif - %define t0 [v1q + orderq] - %define t1 [v1q + orderq + mmsize] -%if ARCH_X86_64 - mova m8, t0 - mova m9, t1 - %define t0 m8 - %define t1 m9 -%endif - pmaddwd m0, t0 - pmaddwd m1, t1 - pmullw m2, m7 - pmullw m3, m7 - paddw m2, t0 - paddw m3, t1 - paddd m6, m0 - paddd m6, m1 - mova [v1q + orderq], m2 - mova [v1q + orderq + mmsize], m3 - jg .loop%1 -%if %1 - jmp .end -%endif -%endmacro - -; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, -; int order, int mul) -INIT_XMM ssse3 -cglobal scalarproduct_and_madd_int16, 4,5,10, v1, v2, v3, order, mul - shl orderq, 1 - movd m7, mulm - pshuflw m7, m7, 0 - punpcklqdq m7, m7 - pxor m6, m6 - mov r4d, v2d - and r4d, 15 - and v2q, ~15 - and v3q, ~15 - mova m4, [v2q + orderq] - mova m5, [v3q + orderq] - ; linear is faster than branch tree or jump table, because the branches taken are cyclic (i.e. predictable) - cmp r4d, 0 - je .loop0 - cmp r4d, 2 - je .loop2 - cmp r4d, 4 - je .loop4 - cmp r4d, 6 - je .loop6 - cmp r4d, 8 - je .loop8 - cmp r4d, 10 - je .loop10 - cmp r4d, 12 - je .loop12 -SCALARPRODUCT_LOOP 14 -SCALARPRODUCT_LOOP 12 -SCALARPRODUCT_LOOP 10 -SCALARPRODUCT_LOOP 8 -SCALARPRODUCT_LOOP 6 -SCALARPRODUCT_LOOP 4 -SCALARPRODUCT_LOOP 2 -SCALARPRODUCT_LOOP 0 -.end: - HADDD m6, m0 - movd eax, m6 - RET - ;----------------------------------------------------------------------------- ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, |