diff options
author | Clément Bœsch <u@pkh.me> | 2017-03-20 22:28:38 +0100 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2017-03-20 22:35:07 +0100 |
commit | 83cd80d10aebd1bde7310ab3d058134d0642a6bb (patch) | |
tree | ce0aa14de18b3c8745924d2fa88a2927c85d6c3d /libavcodec/x86/audiodsp.asm | |
parent | bbc3bde14f1402a68c64a28edc347464554589cb (diff) | |
parent | 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5 (diff) | |
download | ffmpeg-83cd80d10aebd1bde7310ab3d058134d0642a6bb.tar.gz |
Merge commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5'
* commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5':
audiodsp/x86: yasmify vector_clipf_sse
audiodsp: reorder arguments for vector_clipf
Merged the version from Libav after a discussion with James Almer on
IRC:
19:22 <ubitux> jamrial: opinion on 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5?
19:23 <ubitux> it was apparently yasmified differently
19:23 <ubitux> (it depends on the previous commit arg shuffle)
19:24 <ubitux> i don't see the magic movsxdifnidn in your port btw
19:24 <ubitux> it's a port from 1d36defe94c7d7ebf995d4dbb4f878d06272f9c6
19:25 <jamrial> seems better thanks to said arg shuffle
19:25 <jamrial> the loop is the same, but init is simpler
19:25 <jamrial> probably worth merging
19:25 <ubitux> OK
19:25 <ubitux> thanks
19:26 <jamrial> curious they didn't make len ptrdiff_t after the previous bunch of commits, heh
19:26 <ubitux> yeah indeed
Both commits are merged at the same time to prevent a conflict with our
existing yasmified ff_vector_clipf_sse.
Merged-by: Clément Bœsch <u@pkh.me>
Diffstat (limited to 'libavcodec/x86/audiodsp.asm')
-rw-r--r-- | libavcodec/x86/audiodsp.asm | 81 |
1 files changed, 40 insertions, 41 deletions
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index 3eeb6fd67f..8ef2a8c680 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -132,46 +132,45 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 VECTOR_CLIP_INT32 6, 1, 0, 0 %endif -;----------------------------------------------------- -;void ff_vector_clipf(float *dst, const float *src, -; float min, float max, int len) -;----------------------------------------------------- +; void ff_vector_clipf_sse(float *dst, const float *src, +; int len, float min, float max) INIT_XMM sse -%if UNIX64 -cglobal vector_clipf, 3,3,6, dst, src, len -%else -cglobal vector_clipf, 5,5,6, dst, src, min, max, len -%endif -%if WIN64 - SWAP 0, 2 - SWAP 1, 3 -%elif ARCH_X86_32 - movss m0, minm - movss m1, maxm +cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max +%if ARCH_X86_32 + VBROADCASTSS m0, minm + VBROADCASTSS m1, maxm +%elif WIN64 + VBROADCASTSS m0, m3 + VBROADCASTSS m1, maxm +%else ; 64bit sysv + VBROADCASTSS m0, m0 + VBROADCASTSS m1, m1 %endif - SPLATD m0 - SPLATD m1 - shl lend, 2 - add srcq, lenq - add dstq, lenq - neg lenq -.loop: - mova m2, [srcq+lenq+mmsize*0] - mova m3, [srcq+lenq+mmsize*1] - mova m4, [srcq+lenq+mmsize*2] - mova m5, [srcq+lenq+mmsize*3] - maxps m2, m0 - maxps m3, m0 - maxps m4, m0 - maxps m5, m0 - minps m2, m1 - minps m3, m1 - minps m4, m1 - minps m5, m1 - mova [dstq+lenq+mmsize*0], m2 - mova [dstq+lenq+mmsize*1], m3 - mova [dstq+lenq+mmsize*2], m4 - mova [dstq+lenq+mmsize*3], m5 - add lenq, mmsize*4 - jl .loop - REP_RET + + movsxdifnidn lenq, lend + +.loop + mova m2, [srcq + 4 * lenq - 4 * mmsize] + mova m3, [srcq + 4 * lenq - 3 * mmsize] + mova m4, [srcq + 4 * lenq - 2 * mmsize] + mova m5, [srcq + 4 * lenq - 1 * mmsize] + + maxps m2, m0 + maxps m3, m0 + maxps m4, m0 + maxps m5, m0 + + minps m2, m1 + minps m3, m1 + minps m4, m1 + minps m5, m1 + + mova [dstq + 4 * lenq - 4 * mmsize], m2 + mova [dstq + 4 * lenq - 3 * mmsize], m3 + mova [dstq + 4 * lenq - 2 * mmsize], m4 + mova [dstq + 4 * lenq - 1 * mmsize], m5 + + sub lenq, mmsize + jg .loop + + RET |