diff options
author | Clément Bœsch <u@pkh.me> | 2017-03-20 22:28:38 +0100 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2017-03-20 22:35:07 +0100 |
commit | 83cd80d10aebd1bde7310ab3d058134d0642a6bb (patch) | |
tree | ce0aa14de18b3c8745924d2fa88a2927c85d6c3d | |
parent | bbc3bde14f1402a68c64a28edc347464554589cb (diff) | |
parent | 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5 (diff) | |
download | ffmpeg-83cd80d10aebd1bde7310ab3d058134d0642a6bb.tar.gz |
Merge commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5'
* commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5':
audiodsp/x86: yasmify vector_clipf_sse
audiodsp: reorder arguments for vector_clipf
Merged the version from Libav after a discussion with James Almer on
IRC:
19:22 <ubitux> jamrial: opinion on 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5?
19:23 <ubitux> it was apparently yasmified differently
19:23 <ubitux> (it depends on the previous commit arg shuffle)
19:24 <ubitux> i don't see the magic movsxdifnidn in your port btw
19:24 <ubitux> it's a port from 1d36defe94c7d7ebf995d4dbb4f878d06272f9c6
19:25 <jamrial> seems better thanks to said arg shuffle
19:25 <jamrial> the loop is the same, but init is simpler
19:25 <jamrial> probably worth merging
19:25 <ubitux> OK
19:25 <ubitux> thanks
19:26 <jamrial> curious they didn't make len ptrdiff_t after the previous bunch of commits, heh
19:26 <ubitux> yeah indeed
Both commits are merged at the same time to prevent a conflict with our
existing yasmified ff_vector_clipf_sse.
Merged-by: Clément Bœsch <u@pkh.me>
-rw-r--r-- | libavcodec/ac3enc_float.c | 2 | ||||
-rw-r--r-- | libavcodec/arm/audiodsp_init_neon.c | 3 | ||||
-rw-r--r-- | libavcodec/arm/audiodsp_neon.S | 5 | ||||
-rw-r--r-- | libavcodec/audiodsp.c | 4 | ||||
-rw-r--r-- | libavcodec/audiodsp.h | 3 | ||||
-rw-r--r-- | libavcodec/cook.c | 2 | ||||
-rw-r--r-- | libavcodec/x86/audiodsp.asm | 81 | ||||
-rw-r--r-- | libavcodec/x86/audiodsp_init.c | 2 | ||||
-rw-r--r-- | tests/checkasm/audiodsp.c | 8 |
9 files changed, 54 insertions, 56 deletions
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index 6c91f459d0..caa210d5ad 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -121,7 +121,7 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4], static void clip_coefficients(AudioDSPContext *adsp, float *coef, unsigned int len) { - adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); + adsp->vector_clipf(coef, coef, len, COEF_MIN, COEF_MAX); } diff --git a/libavcodec/arm/audiodsp_init_neon.c b/libavcodec/arm/audiodsp_init_neon.c index f7bd162482..6902db86b4 100644 --- a/libavcodec/arm/audiodsp_init_neon.c +++ b/libavcodec/arm/audiodsp_init_neon.c @@ -25,8 +25,7 @@ #include "libavcodec/audiodsp.h" #include "audiodsp_arm.h" -void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, - int len); +void ff_vector_clipf_neon(float *dst, const float *src, int len, float min, float max); void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); diff --git a/libavcodec/arm/audiodsp_neon.S b/libavcodec/arm/audiodsp_neon.S index ab32cef7ab..cea700c84d 100644 --- a/libavcodec/arm/audiodsp_neon.S +++ b/libavcodec/arm/audiodsp_neon.S @@ -24,9 +24,8 @@ function ff_vector_clipf_neon, export=1 VFP vdup.32 q1, d0[1] VFP vdup.32 q0, d0[0] -NOVFP vdup.32 q0, r2 -NOVFP vdup.32 q1, r3 -NOVFP ldr r2, [sp] +NOVFP vdup.32 q0, r3 +NOVFP vld1.32 {d2[],d3[]}, [sp] vld1.f32 {q2},[r1,:128]! vmin.f32 q10, q2, q1 vld1.f32 {q3},[r1,:128]! diff --git a/libavcodec/audiodsp.c b/libavcodec/audiodsp.c index 85b5a74947..3c7a3a7583 100644 --- a/libavcodec/audiodsp.c +++ b/libavcodec/audiodsp.c @@ -55,8 +55,8 @@ static void vector_clipf_c_opposite_sign(float *dst, const float *src, } } -static void vector_clipf_c(float *dst, const float *src, - float min, float max, int len) +static void vector_clipf_c(float *dst, const float *src, int len, + float min, float max) { int i; diff --git a/libavcodec/audiodsp.h b/libavcodec/audiodsp.h index 003a1d0ea9..aa6fa7898b 100644 --- a/libavcodec/audiodsp.h +++ b/libavcodec/audiodsp.h @@ -48,7 +48,8 @@ typedef struct AudioDSPContext { /* assume len is a multiple of 16, and arrays are 16-byte aligned */ void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, - float min, float max, int len /* align 16 */); + int len /* align 16 */, + float min, float max); } AudioDSPContext; void ff_audiodsp_init(AudioDSPContext *c); diff --git a/libavcodec/cook.c b/libavcodec/cook.c index 4488f8eaf7..53cb83852e 100644 --- a/libavcodec/cook.c +++ b/libavcodec/cook.c @@ -882,7 +882,7 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p, static void saturate_output_float(COOKContext *q, float *out) { q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, - -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); + FFALIGN(q->samples_per_channel, 8), -1.0f, 1.0f); } diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index 3eeb6fd67f..8ef2a8c680 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -132,46 +132,45 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 VECTOR_CLIP_INT32 6, 1, 0, 0 %endif -;----------------------------------------------------- -;void ff_vector_clipf(float *dst, const float *src, -; float min, float max, int len) -;----------------------------------------------------- +; void ff_vector_clipf_sse(float *dst, const float *src, +; int len, float min, float max) INIT_XMM sse -%if UNIX64 -cglobal vector_clipf, 3,3,6, dst, src, len -%else -cglobal vector_clipf, 5,5,6, dst, src, min, max, len -%endif -%if WIN64 - SWAP 0, 2 - SWAP 1, 3 -%elif ARCH_X86_32 - movss m0, minm - movss m1, maxm +cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max +%if ARCH_X86_32 + VBROADCASTSS m0, minm + VBROADCASTSS m1, maxm +%elif WIN64 + VBROADCASTSS m0, m3 + VBROADCASTSS m1, maxm +%else ; 64bit sysv + VBROADCASTSS m0, m0 + VBROADCASTSS m1, m1 %endif - SPLATD m0 - SPLATD m1 - shl lend, 2 - add srcq, lenq - add dstq, lenq - neg lenq -.loop: - mova m2, [srcq+lenq+mmsize*0] - mova m3, [srcq+lenq+mmsize*1] - mova m4, [srcq+lenq+mmsize*2] - mova m5, [srcq+lenq+mmsize*3] - maxps m2, m0 - maxps m3, m0 - maxps m4, m0 - maxps m5, m0 - minps m2, m1 - minps m3, m1 - minps m4, m1 - minps m5, m1 - mova [dstq+lenq+mmsize*0], m2 - mova [dstq+lenq+mmsize*1], m3 - mova [dstq+lenq+mmsize*2], m4 - mova [dstq+lenq+mmsize*3], m5 - add lenq, mmsize*4 - jl .loop - REP_RET + + movsxdifnidn lenq, lend + +.loop + mova m2, [srcq + 4 * lenq - 4 * mmsize] + mova m3, [srcq + 4 * lenq - 3 * mmsize] + mova m4, [srcq + 4 * lenq - 2 * mmsize] + mova m5, [srcq + 4 * lenq - 1 * mmsize] + + maxps m2, m0 + maxps m3, m0 + maxps m4, m0 + maxps m5, m0 + + minps m2, m1 + minps m3, m1 + minps m4, m1 + minps m5, m1 + + mova [dstq + 4 * lenq - 4 * mmsize], m2 + mova [dstq + 4 * lenq - 3 * mmsize], m3 + mova [dstq + 4 * lenq - 2 * mmsize], m4 + mova [dstq + 4 * lenq - 1 * mmsize], m5 + + sub lenq, mmsize + jg .loop + + RET diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index 8f9e604bb2..98e296c264 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -38,7 +38,7 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len); + int len, float min, float max); av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) { diff --git a/tests/checkasm/audiodsp.c b/tests/checkasm/audiodsp.c index 1da1d1ea09..7c4e16e40d 100644 --- a/tests/checkasm/audiodsp.c +++ b/tests/checkasm/audiodsp.c @@ -120,7 +120,7 @@ void checkasm_check_audiodsp(void) int i, len; declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src, - float min, float max, unsigned int len); + int len, float min, float max); val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; @@ -133,13 +133,13 @@ void checkasm_check_audiodsp(void) len = rnd() % 128; len = 16 * FFMAX(len, 1); - call_ref(dst0, src, min, max, len); - call_new(dst1, src, min, max, len); + call_ref(dst0, src, len, min, max); + call_new(dst1, src, len, min, max); for (i = 0; i < len; i++) { if (!float_near_ulp_array(dst0, dst1, 3, len)) fail(); } - bench_new(dst1, src, min, max, MAX_SIZE); + bench_new(dst1, src, MAX_SIZE, min, max); } report("audiodsp"); |