aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale
diff options
context:
space:
mode:
authorWu Jianhua <jianhua.wu@intel.com>2021-09-30 09:56:09 +0800
committerPaul B Mahol <onemda@gmail.com>2021-10-15 10:59:20 +0200
commit2c734a84964c6f6896194b0a3175124897468eac (patch)
tree8b6238e329f457cdd984c4d2cbf528aed3ff2cc6 /libswscale
parent767f162432760b1c8e0354e50f40dcaa3a36c69e (diff)
downloadffmpeg-2c734a84964c6f6896194b0a3175124897468eac.tar.gz
libswscale/x86/rgb2rgb: add shuffle_bytes avx2
Performance data(Less is better): shuffle_bytes_ssse3 3.64654 shuffle_bytes_avx2 0.94288 Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/x86/rgb2rgb.c17
-rw-r--r--libswscale/x86/rgb_2_rgb.asm11
2 files changed, 26 insertions, 2 deletions
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index c38a953277..0ab139aca4 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -146,6 +146,12 @@ void ff_shuffle_bytes_3012_ssse3(const uint8_t *src, uint8_t *dst, int src_size)
void ff_shuffle_bytes_3210_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
#if ARCH_X86_64
+void ff_shuffle_bytes_2103_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_0321_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_1230_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_3012_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_3210_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+
void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
const uint8_t *src, int width, int height,
int lumStride, int chromStride, int srcStride);
@@ -186,9 +192,16 @@ av_cold void rgb2rgb_init_x86(void)
shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
}
- if (EXTERNAL_AVX(cpu_flags)) {
#if ARCH_X86_64
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ shuffle_bytes_0321 = ff_shuffle_bytes_0321_avx2;
+ shuffle_bytes_2103 = ff_shuffle_bytes_2103_avx2;
+ shuffle_bytes_1230 = ff_shuffle_bytes_1230_avx2;
+ shuffle_bytes_3012 = ff_shuffle_bytes_3012_avx2;
+ shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2;
+ }
+ if (EXTERNAL_AVX(cpu_flags)) {
uyvytoyuv422 = ff_uyvytoyuv422_avx;
-#endif
}
+#endif
}
diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
index 29b856e281..c695c61d5c 100644
--- a/libswscale/x86/rgb_2_rgb.asm
+++ b/libswscale/x86/rgb_2_rgb.asm
@@ -159,6 +159,17 @@ SHUFFLE_BYTES 1, 2, 3, 0
SHUFFLE_BYTES 3, 0, 1, 2
SHUFFLE_BYTES 3, 2, 1, 0
+%if ARCH_X86_64
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+SHUFFLE_BYTES 2, 1, 0, 3
+SHUFFLE_BYTES 0, 3, 2, 1
+SHUFFLE_BYTES 1, 2, 3, 0
+SHUFFLE_BYTES 3, 0, 1, 2
+SHUFFLE_BYTES 3, 2, 1, 0
+%endif
+%endif
+
;-----------------------------------------------------------------------------------------------
; uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
; const uint8_t *src, int width, int height,