aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Rheinhardt <andreas.rheinhardt@outlook.com>2024-06-05 22:47:17 +0200
committerAndreas Rheinhardt <andreas.rheinhardt@outlook.com>2024-06-09 12:03:47 +0200
commitc1c35380a7de98f97a5000ae51be1bce3bc73144 (patch)
tree8415c6f520bd5173ec2f006834c6347bf7544129
parentf7305eb3b3e8f5f4d2061cb1126040769e5aacf6 (diff)
downloadffmpeg-c1c35380a7de98f97a5000ae51be1bce3bc73144.tar.gz
swscale/x86/rgb2rgb: Don't unnecessarily check for inline ASM
The SSE2 and AVX versions of deinterleaveBytes are external ASM. Move them out of the inline ASM template. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
-rw-r--r--libswscale/x86/rgb2rgb.c48
-rw-r--r--libswscale/x86/rgb2rgb_template.c30
2 files changed, 36 insertions, 42 deletions
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index b325e5dbd5..be6f5abc95 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -100,13 +100,6 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
#define RENAME(a) a ## _sse2
#include "rgb2rgb_template.c"
-//AVX versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_AVX
-#define COMPILE_TEMPLATE_AVX 1
-#define RENAME(a) a ## _avx
-#include "rgb2rgb_template.c"
-
/*
RGB15->RGB16 original by Strepto/Astral
ported to gcc & bugfixed : A'rpi
@@ -138,6 +131,33 @@ void ff_uyvytoyuv422_avx(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int lumStride, int chromStride, int srcStride);
#endif
+#define DEINTERLEAVE_BYTES(cpuext) \
+void ff_nv12ToUV_ ## cpuext(uint8_t *dstU, uint8_t *dstV, \
+ const uint8_t *unused, \
+ const uint8_t *src1, \
+ const uint8_t *src2, \
+ int w, \
+ uint32_t *unused2, \
+ void *opq); \
+static void deinterleave_bytes_ ## cpuext(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, \
+ int width, int height, int srcStride, \
+ int dst1Stride, int dst2Stride) \
+{ \
+ for (int h = 0; h < height; h++) { \
+ ff_nv12ToUV_ ## cpuext(dst1, dst2, NULL, src, NULL, width, NULL, NULL); \
+ src += srcStride; \
+ dst1 += dst1Stride; \
+ dst2 += dst2Stride; \
+ } \
+}
+
+#if HAVE_SSE2_EXTERNAL
+DEINTERLEAVE_BYTES(sse2)
+#endif
+#if HAVE_AVX_EXTERNAL
+DEINTERLEAVE_BYTES(avx)
+#endif
+
av_cold void rgb2rgb_init_x86(void)
{
int cpu_flags = av_get_cpu_flags();
@@ -147,18 +167,19 @@ av_cold void rgb2rgb_init_x86(void)
rgb2rgb_init_mmxext();
if (INLINE_SSE2(cpu_flags))
rgb2rgb_init_sse2();
- if (INLINE_AVX(cpu_flags))
- rgb2rgb_init_avx();
#endif /* HAVE_INLINE_ASM */
if (EXTERNAL_MMXEXT(cpu_flags)) {
shuffle_bytes_2103 = ff_shuffle_bytes_2103_mmxext;
}
+#if HAVE_SSE2_EXTERNAL
if (EXTERNAL_SSE2(cpu_flags)) {
#if ARCH_X86_64
uyvytoyuv422 = ff_uyvytoyuv422_sse2;
#endif
+ deinterleaveBytes = deinterleave_bytes_sse2;
}
+#endif
if (EXTERNAL_SSSE3(cpu_flags)) {
shuffle_bytes_0321 = ff_shuffle_bytes_0321_ssse3;
shuffle_bytes_2103 = ff_shuffle_bytes_2103_ssse3;
@@ -166,16 +187,19 @@ av_cold void rgb2rgb_init_x86(void)
shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
}
+#if HAVE_AVX_EXTERNAL
+ if (EXTERNAL_AVX(cpu_flags)) {
+ deinterleaveBytes = deinterleave_bytes_avx;
#if ARCH_X86_64
+ uyvytoyuv422 = ff_uyvytoyuv422_avx;
+ }
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
shuffle_bytes_0321 = ff_shuffle_bytes_0321_avx2;
shuffle_bytes_2103 = ff_shuffle_bytes_2103_avx2;
shuffle_bytes_1230 = ff_shuffle_bytes_1230_avx2;
shuffle_bytes_3012 = ff_shuffle_bytes_3012_avx2;
shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2;
- }
- if (EXTERNAL_AVX(cpu_flags)) {
- uyvytoyuv422 = ff_uyvytoyuv422_avx;
+#endif
}
#endif
}
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index e4e884827c..5c73fa4e16 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -1816,31 +1816,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
}
#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */
-#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
-#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
-void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *unused,
- const uint8_t *src1,
- const uint8_t *src2,
- int w,
- uint32_t *unused2,
- void *opq);
-static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
- int width, int height, int srcStride,
- int dst1Stride, int dst2Stride)
-{
- int h;
-
- for (h = 0; h < height; h++) {
- RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL, NULL);
- src += srcStride;
- dst1 += dst1Stride;
- dst2 += dst2Stride;
- }
-}
-#endif /* COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM */
-#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */
-
#if !COMPILE_TEMPLATE_SSE2
static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
uint8_t *dst1, uint8_t *dst2,
@@ -2441,9 +2416,4 @@ static av_cold void RENAME(rgb2rgb_init)(void)
#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2
interleaveBytes = RENAME(interleaveBytes);
#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */
-#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
-#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
- deinterleaveBytes = RENAME(deinterleaveBytes);
-#endif
-#endif
}