aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2012-01-08 16:37:43 -0800
committerRonald S. Bultje <rsbultje@gmail.com>2012-01-08 16:37:43 -0800
commit2170a0e6add6bb0a6fbdf689b82361c21d9b72be (patch)
tree8eb78b7d78b23e0e8b9d9b04fc5c07f954f0a000
parent599855748a93dc8bdaf5a5b3ac681e9a7026bc04 (diff)
downloadffmpeg-2170a0e6add6bb0a6fbdf689b82361c21d9b72be.tar.gz
swscale: convert yuy2/uyvy/nv12/nv21ToY/UV from inline asm to yasm.
Also implement SSE2/AVX variants.
-rw-r--r--libswscale/Makefile3
-rw-r--r--libswscale/x86/input.asm242
-rw-r--r--libswscale/x86/swscale_mmx.c83
-rw-r--r--libswscale/x86/swscale_template.c163
4 files changed, 327 insertions, 164 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile
index 89182573f0..bef4200c59 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -17,7 +17,8 @@ OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \
x86/swscale_mmx.o \
x86/yuv2rgb_mmx.o
OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o
-MMX-OBJS-$(HAVE_YASM) += x86/output.o \
+MMX-OBJS-$(HAVE_YASM) += x86/input.o \
+ x86/output.o \
x86/scale.o
TESTPROGS = colorspace swscale
diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
new file mode 100644
index 0000000000..4bdb575765
--- /dev/null
+++ b/libswscale/x86/input.asm
@@ -0,0 +1,242 @@
+;******************************************************************************
+;* x86-optimized input routines; does shuffling of packed
+;* YUV formats into individual planes, and converts RGB
+;* into YUV planes also.
+;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; YUYV/UYVY/NV12/NV21 packed pixel shuffling.
+;
+; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w);
+; and
+; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src,
+; const uint8_t *unused, int w);
+;-----------------------------------------------------------------------------
+
+; %1 = a (aligned) or u (unaligned)
+; %2 = yuyv or uyvy
+%macro LOOP_YUYV_TO_Y 2
+.loop_%1:
+ mov%1 m0, [srcq+wq*2] ; (byte) { Y0, U0, Y1, V0, ... }
+ mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
+%ifidn %2, yuyv
+ pand m0, m2 ; (word) { Y0, Y1, ..., Y7 }
+ pand m1, m2 ; (word) { Y8, Y9, ..., Y15 }
+%else ; uyvy
+ psrlw m0, 8 ; (word) { Y0, Y1, ..., Y7 }
+ psrlw m1, 8 ; (word) { Y8, Y9, ..., Y15 }
+%endif ; yuyv/uyvy
+ packuswb m0, m1 ; (byte) { Y0, ..., Y15 }
+ mova [dstq+wq], m0
+ add wq, mmsize
+ jl .loop_%1
+ REP_RET
+%endmacro
+
+; %1 = nr. of XMM registers
+; %2 = yuyv or uyvy
+; %3 = if specified, it means that unaligned and aligned code in loop
+; will be the same (i.e. YUYV+AVX), and thus we don't need to
+; split the loop in an aligned and unaligned case
+%macro YUYV_TO_Y_FN 2-3
+cglobal %2ToY, 3, 3, %1, dst, src, w
+%ifdef ARCH_X86_64
+ movsxd wq, wd
+%endif
+ add dstq, wq
+%if mmsize == 16
+ test srcq, 15
+%endif
+ lea srcq, [srcq+wq*2]
+%ifidn %2, yuyv
+ pcmpeqb m2, m2 ; (byte) { 0xff } x 16
+ psrlw m2, 8 ; (word) { 0x00ff } x 8
+%endif ; yuyv
+%if mmsize == 16
+ jnz .loop_u_start
+ neg wq
+ LOOP_YUYV_TO_Y a, %2
+.loop_u_start:
+ neg wq
+ LOOP_YUYV_TO_Y u, %2
+%else ; mmsize == 8
+ neg wq
+ LOOP_YUYV_TO_Y a, %2
+%endif ; mmsize == 8/16
+%endmacro
+
+; %1 = a (aligned) or u (unaligned)
+; %2 = yuyv or uyvy
+%macro LOOP_YUYV_TO_UV 2
+.loop_%1:
+%ifidn %2, yuyv
+ mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... }
+ mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
+ psrlw m0, 8 ; (word) { U0, V0, ..., U3, V3 }
+ psrlw m1, 8 ; (word) { U4, V4, ..., U7, V7 }
+%else ; uyvy
+%if cpuflag(avx)
+ vpand m0, m2, [srcq+wq*4] ; (word) { U0, V0, ..., U3, V3 }
+ vpand m1, m2, [srcq+wq*4+mmsize] ; (word) { U4, V4, ..., U7, V7 }
+%else
+ mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... }
+ mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... }
+ pand m0, m2 ; (word) { U0, V0, ..., U3, V3 }
+ pand m1, m2 ; (word) { U4, V4, ..., U7, V7 }
+%endif
+%endif ; yuyv/uyvy
+ packuswb m0, m1 ; (byte) { U0, V0, ..., U7, V7 }
+ pand m1, m0, m2 ; (word) { U0, U1, ..., U7 }
+ psrlw m0, 8 ; (word) { V0, V1, ..., V7 }
+%if mmsize == 16
+ packuswb m1, m0 ; (byte) { U0, ... U7, V1, ... V7 }
+ movh [dstUq+wq], m1
+ movhps [dstVq+wq], m1
+%else ; mmsize == 8
+ packuswb m1, m1 ; (byte) { U0, ... U3 }
+ packuswb m0, m0 ; (byte) { V0, ... V3 }
+ movh [dstUq+wq], m1
+ movh [dstVq+wq], m0
+%endif ; mmsize == 8/16
+ add wq, mmsize / 2
+ jl .loop_%1
+ REP_RET
+%endmacro
+
+; %1 = nr. of XMM registers
+; %2 = yuyv or uyvy
+; %3 = if specified, it means that unaligned and aligned code in loop
+; will be the same (i.e. UYVY+AVX), and thus we don't need to
+; split the loop in an aligned and unaligned case
+%macro YUYV_TO_UV_FN 2-3
+cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
+%ifdef ARCH_X86_64
+ movsxd wq, r4m
+%else ; x86-32
+ mov wq, r4m
+%endif
+ add dstUq, wq
+ add dstVq, wq
+%if mmsize == 16 && %0 == 2
+ test srcq, 15
+%endif
+ lea srcq, [srcq+wq*4]
+ pcmpeqb m2, m2 ; (byte) { 0xff } x 16
+ psrlw m2, 8 ; (word) { 0x00ff } x 8
+ ; NOTE: if uyvy+avx, u/a are identical
+%if mmsize == 16 && %0 == 2
+ jnz .loop_u_start
+ neg wq
+ LOOP_YUYV_TO_UV a, %2
+.loop_u_start:
+ neg wq
+ LOOP_YUYV_TO_UV u, %2
+%else ; mmsize == 8
+ neg wq
+ LOOP_YUYV_TO_UV a, %2
+%endif ; mmsize == 8/16
+%endmacro
+
+; %1 = a (aligned) or u (unaligned)
+; %2 = nv12 or nv21
+%macro LOOP_NVXX_TO_UV 2
+.loop_%1:
+ mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... }
+ mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... }
+ pand m2, m0, m4 ; (word) { U0, U1, ..., U7 }
+ pand m3, m1, m4 ; (word) { U8, U9, ..., U15 }
+ psrlw m0, 8 ; (word) { V0, V1, ..., V7 }
+ psrlw m1, 8 ; (word) { V8, V9, ..., V15 }
+ packuswb m2, m3 ; (byte) { U0, ..., U15 }
+ packuswb m0, m1 ; (byte) { V0, ..., V15 }
+%ifidn %2, nv12
+ mova [dstUq+wq], m2
+ mova [dstVq+wq], m0
+%else ; nv21
+ mova [dstVq+wq], m2
+ mova [dstUq+wq], m0
+%endif ; nv12/21
+ add wq, mmsize
+ jl .loop_%1
+ REP_RET
+%endmacro
+
+; %1 = nr. of XMM registers
+; %2 = nv12 or nv21
+%macro NVXX_TO_UV_FN 2
+cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
+%ifdef ARCH_X86_64
+ movsxd wq, r4m
+%else ; x86-32
+ mov wq, r4m
+%endif
+ add dstUq, wq
+ add dstVq, wq
+%if mmsize == 16
+ test srcq, 15
+%endif
+ lea srcq, [srcq+wq*2]
+ pcmpeqb m4, m4 ; (byte) { 0xff } x 16
+ psrlw m4, 8 ; (word) { 0x00ff } x 8
+%if mmsize == 16
+ jnz .loop_u_start
+ neg wq
+ LOOP_NVXX_TO_UV a, %2
+.loop_u_start:
+ neg wq
+ LOOP_NVXX_TO_UV u, %2
+%else ; mmsize == 8
+ neg wq
+ LOOP_NVXX_TO_UV a, %2
+%endif ; mmsize == 8/16
+%endmacro
+
+%ifdef ARCH_X86_32
+INIT_MMX mmx
+YUYV_TO_Y_FN 0, yuyv
+YUYV_TO_Y_FN 0, uyvy
+YUYV_TO_UV_FN 0, yuyv
+YUYV_TO_UV_FN 0, uyvy
+NVXX_TO_UV_FN 0, nv12
+NVXX_TO_UV_FN 0, nv21
+%endif
+
+INIT_XMM sse2
+YUYV_TO_Y_FN 3, yuyv
+YUYV_TO_Y_FN 2, uyvy
+YUYV_TO_UV_FN 3, yuyv
+YUYV_TO_UV_FN 3, uyvy
+NVXX_TO_UV_FN 5, nv12
+NVXX_TO_UV_FN 5, nv21
+
+INIT_XMM avx
+; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but
+; that's not faster in practice
+YUYV_TO_UV_FN 3, yuyv
+YUYV_TO_UV_FN 3, uyvy, 1
+NVXX_TO_UV_FN 5, nv12
+NVXX_TO_UV_FN 5, nv21
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c
index 4305cef41d..867a9f1244 100644
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -244,6 +244,26 @@ VSCALE_FUNCS(sse2, sse2);
VSCALE_FUNC(16, sse4);
VSCALE_FUNCS(avx, avx);
+#define INPUT_UV_FUNC(fmt, opt) \
+extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
+ const uint8_t *src, const uint8_t *unused1, \
+ int w, uint32_t *unused2)
+#define INPUT_FUNC(fmt, opt) \
+extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
+ int w, uint32_t *unused); \
+ INPUT_UV_FUNC(fmt, opt)
+#define INPUT_FUNCS(opt) \
+ INPUT_FUNC(uyvy, opt); \
+ INPUT_FUNC(yuyv, opt); \
+ INPUT_UV_FUNC(nv12, opt); \
+ INPUT_UV_FUNC(nv21, opt)
+
+#if ARCH_X86_32
+INPUT_FUNCS(mmx);
+#endif
+INPUT_FUNCS(sse2);
+INPUT_FUNCS(avx);
+
void ff_sws_init_swScale_mmx(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -296,6 +316,30 @@ switch(c->dstBpc){ \
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2);
+
+ switch (c->srcFormat) {
+ case PIX_FMT_Y400A:
+ c->lumToYV12 = ff_yuyvToY_mmx;
+ if (c->alpPixBuf)
+ c->alpToYV12 = ff_uyvyToY_mmx;
+ break;
+ case PIX_FMT_YUYV422:
+ c->lumToYV12 = ff_yuyvToY_mmx;
+ c->chrToYV12 = ff_yuyvToUV_mmx;
+ break;
+ case PIX_FMT_UYVY422:
+ c->lumToYV12 = ff_uyvyToY_mmx;
+ c->chrToYV12 = ff_uyvyToUV_mmx;
+ break;
+ case PIX_FMT_NV12:
+ c->chrToYV12 = ff_nv12ToUV_mmx;
+ break;
+ case PIX_FMT_NV21:
+ c->chrToYV12 = ff_nv21ToUV_mmx;
+ break;
+ default:
+ break;
+ }
}
if (cpu_flags & AV_CPU_FLAG_MMX2) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2,);
@@ -314,6 +358,28 @@ switch(c->dstBpc){ \
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2,);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
+
+ switch (c->srcFormat) {
+ case PIX_FMT_Y400A:
+ c->lumToYV12 = ff_yuyvToY_sse2;
+ if (c->alpPixBuf)
+ c->alpToYV12 = ff_uyvyToY_sse2;
+ break;
+ case PIX_FMT_YUYV422:
+ c->lumToYV12 = ff_yuyvToY_sse2;
+ c->chrToYV12 = ff_yuyvToUV_sse2;
+ break;
+ case PIX_FMT_UYVY422:
+ c->lumToYV12 = ff_uyvyToY_sse2;
+ c->chrToYV12 = ff_uyvyToUV_sse2;
+ break;
+ case PIX_FMT_NV12:
+ c->chrToYV12 = ff_nv12ToUV_sse2;
+ break;
+ case PIX_FMT_NV21:
+ c->chrToYV12 = ff_nv21ToUV_sse2;
+ break;
+ }
}
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
@@ -332,6 +398,23 @@ switch(c->dstBpc){ \
if (cpu_flags & AV_CPU_FLAG_AVX) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
+
+ switch (c->srcFormat) {
+ case PIX_FMT_YUYV422:
+ c->chrToYV12 = ff_yuyvToUV_avx;
+ break;
+ case PIX_FMT_UYVY422:
+ c->chrToYV12 = ff_uyvyToUV_avx;
+ break;
+ case PIX_FMT_NV12:
+ c->chrToYV12 = ff_nv12ToUV_avx;
+ break;
+ case PIX_FMT_NV21:
+ c->chrToYV12 = ff_nv21ToUV_avx;
+ break;
+ default:
+ break;
+ }
}
#endif
}
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 5e7df5c4a0..b3d7336588 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1361,147 +1361,6 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
}
}
-#if !COMPILE_TEMPLATE_MMX2
-//FIXME yuy2* can read up to 7 samples too much
-
-static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src,
- int width, uint32_t *unused)
-{
- __asm__ volatile(
- "movq "MANGLE(bm01010101)", %%mm2 \n\t"
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",2), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
- "pand %%mm2, %%mm0 \n\t"
- "pand %%mm2, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, (%2, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
- : "%"REG_a
- );
-}
-
-static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
- int width, uint32_t *unused)
-{
- __asm__ volatile(
- "movq "MANGLE(bm01010101)", %%mm4 \n\t"
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",4), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
- "psrlw $8, %%mm0 \n\t"
- "psrlw $8, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "psrlw $8, %%mm0 \n\t"
- "pand %%mm4, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- "movd %%mm0, (%3, %%"REG_a") \n\t"
- "movd %%mm1, (%2, %%"REG_a") \n\t"
- "add $4, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
- : "%"REG_a
- );
- assert(src1 == src2);
-}
-
-/* This is almost identical to the previous, end exists only because
- * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src,
- int width, uint32_t *unused)
-{
- __asm__ volatile(
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",2), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
- "psrlw $8, %%mm0 \n\t"
- "psrlw $8, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, (%2, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
- : "%"REG_a
- );
-}
-
-static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
- int width, uint32_t *unused)
-{
- __asm__ volatile(
- "movq "MANGLE(bm01010101)", %%mm4 \n\t"
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",4), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
- "pand %%mm4, %%mm0 \n\t"
- "pand %%mm4, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "psrlw $8, %%mm0 \n\t"
- "pand %%mm4, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- "movd %%mm0, (%3, %%"REG_a") \n\t"
- "movd %%mm1, (%2, %%"REG_a") \n\t"
- "add $4, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
- : "%"REG_a
- );
- assert(src1 == src2);
-}
-
-static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
- const uint8_t *src, int width)
-{
- __asm__ volatile(
- "movq "MANGLE(bm01010101)", %%mm4 \n\t"
- "mov %0, %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_a",2), %%mm0 \n\t"
- "movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm1, %%mm3 \n\t"
- "pand %%mm4, %%mm0 \n\t"
- "pand %%mm4, %%mm1 \n\t"
- "psrlw $8, %%mm2 \n\t"
- "psrlw $8, %%mm3 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "packuswb %%mm3, %%mm2 \n\t"
- "movq %%mm0, (%2, %%"REG_a") \n\t"
- "movq %%mm2, (%3, %%"REG_a") \n\t"
- "add $8, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
- : "%"REG_a
- );
-}
-
-static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
- int width, uint32_t *unused)
-{
- RENAME(nvXXtoUV)(dstU, dstV, src1, width);
-}
-
-static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
- int width, uint32_t *unused)
-{
- RENAME(nvXXtoUV)(dstV, dstU, src1, width);
-}
-#endif /* !COMPILE_TEMPLATE_MMX2 */
-
static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src,
int width, enum PixelFormat srcFormat)
{
@@ -1856,15 +1715,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
#endif /* COMPILE_TEMPLATE_MMX2 */
}
-#if !COMPILE_TEMPLATE_MMX2
- switch(srcFormat) {
- case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break;
- case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break;
- case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break;
- case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break;
- default: break;
- }
-#endif /* !COMPILE_TEMPLATE_MMX2 */
if (!c->chrSrcHSubSample) {
switch(srcFormat) {
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
@@ -1874,21 +1724,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
switch (srcFormat) {
-#if !COMPILE_TEMPLATE_MMX2
- case PIX_FMT_YUYV422 :
- case PIX_FMT_Y400A : c->lumToYV12 = RENAME(yuy2ToY); break;
- case PIX_FMT_UYVY422 : c->lumToYV12 = RENAME(uyvyToY); break;
-#endif /* !COMPILE_TEMPLATE_MMX2 */
case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
default: break;
}
-#if !COMPILE_TEMPLATE_MMX2
- if (c->alpPixBuf) {
- switch (srcFormat) {
- case PIX_FMT_Y400A : c->alpToYV12 = RENAME(yuy2ToY); break;
- default: break;
- }
- }
-#endif /* !COMPILE_TEMPLATE_MMX2 */
}