diff options
author | James Almer <jamrial@gmail.com> | 2024-06-15 21:00:17 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2024-06-15 21:02:06 -0300 |
commit | 8a4c9d6bd31f56f588f74f3f06fc78769392fbdd (patch) | |
tree | afe95f42783cd50876e211f6c2d799af4a83d7d0 /libswscale | |
parent | f6859cade3a8e13956873df345e7472f9c78eea6 (diff) | |
download | ffmpeg-8a4c9d6bd31f56f588f74f3f06fc78769392fbdd.tar.gz |
swscale/x86/range_convert: reduce amount of xmm regs clobbered in luma functions
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/x86/range_convert.asm | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/libswscale/x86/range_convert.asm b/libswscale/x86/range_convert.asm index ae51e9d573..572364df50 100644 --- a/libswscale/x86/range_convert.asm +++ b/libswscale/x86/range_convert.asm @@ -52,21 +52,21 @@ SECTION .text ;----------------------------------------------------------------------------- %macro LUMCONVERTRANGE 4 -cglobal %1, 2, 2, 7, dst, width +cglobal %1, 2, 2, 5, dst, width shl widthd, 1 - VBROADCASTI128 m4, [%2] - VBROADCASTI128 m5, [%3] - pxor m6, m6 + VBROADCASTI128 m2, [%2] + VBROADCASTI128 m3, [%3] + pxor m4, m4 add dstq, widthq neg widthq .loop: movu m0, [dstq+widthq] - punpckhwd m1, m0, m6 - punpcklwd m0, m6 - pmaddwd m0, m4 - pmaddwd m1, m4 - paddd m0, m5 - paddd m1, m5 + punpckhwd m1, m0, m4 + punpcklwd m0, m4 + pmaddwd m0, m2 + pmaddwd m1, m2 + paddd m0, m3 + paddd m1, m3 psrad m0, %4 psrad m1, %4 packssdw m0, m1 |