diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-10-15 14:19:33 -0700 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-10-22 10:35:14 -0700 |
commit | 6cacecdca3b5c9fc769bf404fa19ef3597209e46 (patch) | |
tree | 33ef7a2a18ffde5855616817a3fadc86402cb844 /libswscale/x86/swscale_template.c | |
parent | 7fbbf9529397756a31850fe37036f026f34f80fc (diff) | |
download | ffmpeg-6cacecdca3b5c9fc769bf404fa19ef3597209e46.tar.gz |
swscale: make yuv2yuvX_10_sse2/avx 8/9/16-bits aware.
Also implement MMX/MMX2 versions and SSE4 versions.
Diffstat (limited to 'libswscale/x86/swscale_template.c')
-rw-r--r-- | libswscale/x86/swscale_template.c | 206 |
1 files changed, 0 insertions, 206 deletions
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index ccf4f7491f..a0381e40f5 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -35,41 +35,6 @@ #endif #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) -#define YSCALEYUV2YV12X(offset, dest, end, pos) \ - __asm__ volatile(\ - "movq "DITHER16"+0(%0), %%mm3 \n\t"\ - "movq "DITHER16"+8(%0), %%mm4 \n\t"\ - "lea " offset "(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - ".p2align 4 \n\t" /* FIXME Unroll? */\ - "1: \n\t"\ - "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ - "movq (%%"REG_S", %3, 2), %%mm2 \n\t" /* srcData */\ - "movq 8(%%"REG_S", %3, 2), %%mm5 \n\t" /* srcData */\ - "add $16, %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "pmulhw %%mm0, %%mm2 \n\t"\ - "pmulhw %%mm0, %%mm5 \n\t"\ - "paddw %%mm2, %%mm3 \n\t"\ - "paddw %%mm5, %%mm4 \n\t"\ - " jnz 1b \n\t"\ - "psraw $3, %%mm3 \n\t"\ - "psraw $3, %%mm4 \n\t"\ - "packuswb %%mm4, %%mm3 \n\t"\ - MOVNTQ(%%mm3, (%1, %3))\ - "add $8, %3 \n\t"\ - "cmp %2, %3 \n\t"\ - "movq "DITHER16"+0(%0), %%mm3 \n\t"\ - "movq "DITHER16"+8(%0), %%mm4 \n\t"\ - "lea " offset "(%0), %%"REG_d" \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "jb 1b \n\t"\ - :: "r" (&c->redDither),\ - "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\ - : "%"REG_d, "%"REG_S\ - ); - #if !COMPILE_TEMPLATE_MMX2 static av_always_inline void dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot) @@ -106,175 +71,6 @@ dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot) } #endif -static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest[4], int dstW, int chrDstW) -{ - uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], - *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; - const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8; - - if (uDest) { - x86_reg uv_off = c->uv_off_byte >> 1; - dither_8to16(c, chrDither, 0); - YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) - dither_8to16(c, chrDither, 1); - YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) - } - dither_8to16(c, lumDither, 0); - if (CONFIG_SWSCALE_ALPHA && aDest) { - YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) - } - - YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0) -} - -#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \ - __asm__ volatile(\ - "lea " offset "(%0), %%"REG_d" \n\t"\ - "movq "DITHER32"+0(%0), %%mm4 \n\t"\ - "movq "DITHER32"+8(%0), %%mm5 \n\t"\ - "movq "DITHER32"+16(%0), %%mm6 \n\t"\ - "movq "DITHER32"+24(%0), %%mm7 \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - ".p2align 4 \n\t"\ - "1: \n\t"\ - "movq (%%"REG_S", %3, 2), %%mm0 \n\t" /* srcData */\ - "movq 8(%%"REG_S", %3, 2), %%mm2 \n\t" /* srcData */\ - "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ - "movq (%%"REG_S", %3, 2), %%mm1 \n\t" /* srcData */\ - "movq %%mm0, %%mm3 \n\t"\ - "punpcklwd %%mm1, %%mm0 \n\t"\ - "punpckhwd %%mm1, %%mm3 \n\t"\ - "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ - "pmaddwd %%mm1, %%mm0 \n\t"\ - "pmaddwd %%mm1, %%mm3 \n\t"\ - "paddd %%mm0, %%mm4 \n\t"\ - "paddd %%mm3, %%mm5 \n\t"\ - "movq 8(%%"REG_S", %3, 2), %%mm3 \n\t" /* srcData */\ - "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ - "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ - "test %%"REG_S", %%"REG_S" \n\t"\ - "movq %%mm2, %%mm0 \n\t"\ - "punpcklwd %%mm3, %%mm2 \n\t"\ - "punpckhwd %%mm3, %%mm0 \n\t"\ - "pmaddwd %%mm1, %%mm2 \n\t"\ - "pmaddwd %%mm1, %%mm0 \n\t"\ - "paddd %%mm2, %%mm6 \n\t"\ - "paddd %%mm0, %%mm7 \n\t"\ - " jnz 1b \n\t"\ - "psrad $16, %%mm4 \n\t"\ - "psrad $16, %%mm5 \n\t"\ - "psrad $16, %%mm6 \n\t"\ - "psrad $16, %%mm7 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ - "packssdw %%mm5, %%mm4 \n\t"\ - "packssdw %%mm7, %%mm6 \n\t"\ - "paddw %%mm0, %%mm4 \n\t"\ - "paddw %%mm0, %%mm6 \n\t"\ - "psraw $3, %%mm4 \n\t"\ - "psraw $3, %%mm6 \n\t"\ - "packuswb %%mm6, %%mm4 \n\t"\ - MOVNTQ(%%mm4, (%1, %3))\ - "add $8, %3 \n\t"\ - "cmp %2, %3 \n\t"\ - "lea " offset "(%0), %%"REG_d" \n\t"\ - "movq "DITHER32"+0(%0), %%mm4 \n\t"\ - "movq "DITHER32"+8(%0), %%mm5 \n\t"\ - "movq "DITHER32"+16(%0), %%mm6 \n\t"\ - "movq "DITHER32"+24(%0), %%mm7 \n\t"\ - "mov (%%"REG_d"), %%"REG_S" \n\t"\ - "jb 1b \n\t"\ - :: "r" (&c->redDither),\ - "r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos))\ - : "%"REG_a, "%"REG_d, "%"REG_S\ - ); - -#if !COMPILE_TEMPLATE_MMX2 -static av_always_inline void -dither_8to32(SwsContext *c, const uint8_t *srcDither, int rot) -{ - if (rot) { - __asm__ volatile("pxor %%mm0, %%mm0\n\t" - "movq (%0), %%mm4\n\t" - "movq %%mm4, %%mm5\n\t" - "psrlq $24, %%mm4\n\t" - "psllq $40, %%mm5\n\t" - "por %%mm5, %%mm4\n\t" - "movq %%mm4, %%mm6\n\t" - "punpcklbw %%mm0, %%mm4\n\t" - "punpckhbw %%mm0, %%mm6\n\t" - "movq %%mm4, %%mm5\n\t" - "movq %%mm6, %%mm7\n\t" - "punpcklwd %%mm0, %%mm4\n\t" - "punpckhwd %%mm0, %%mm5\n\t" - "punpcklwd %%mm0, %%mm6\n\t" - "punpckhwd %%mm0, %%mm7\n\t" - "pslld $12, %%mm4\n\t" - "pslld $12, %%mm5\n\t" - "pslld $12, %%mm6\n\t" - "pslld $12, %%mm7\n\t" - "movq %%mm4, "DITHER32"+0(%1)\n\t" - "movq %%mm5, "DITHER32"+8(%1)\n\t" - "movq %%mm6, "DITHER32"+16(%1)\n\t" - "movq %%mm7, "DITHER32"+24(%1)\n\t" - :: "r"(srcDither), "r"(&c->redDither) - ); - } else { - __asm__ volatile("pxor %%mm0, %%mm0\n\t" - "movq (%0), %%mm4\n\t" - "movq %%mm4, %%mm6\n\t" - "punpcklbw %%mm0, %%mm4\n\t" - "punpckhbw %%mm0, %%mm6\n\t" - "movq %%mm4, %%mm5\n\t" - "movq %%mm6, %%mm7\n\t" - "punpcklwd %%mm0, %%mm4\n\t" - "punpckhwd %%mm0, %%mm5\n\t" - "punpcklwd %%mm0, %%mm6\n\t" - "punpckhwd %%mm0, %%mm7\n\t" - "pslld $12, %%mm4\n\t" - "pslld $12, %%mm5\n\t" - "pslld $12, %%mm6\n\t" - "pslld $12, %%mm7\n\t" - "movq %%mm4, "DITHER32"+0(%1)\n\t" - "movq %%mm5, "DITHER32"+8(%1)\n\t" - "movq %%mm6, "DITHER32"+16(%1)\n\t" - "movq %%mm7, "DITHER32"+24(%1)\n\t" - :: "r"(srcDither), "r"(&c->redDither) - ); - } -} -#endif - -static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest[4], int dstW, int chrDstW) -{ - uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], - *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; - const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8; - - if (uDest) { - x86_reg uv_off = c->uv_off_byte >> 1; - dither_8to32(c, chrDither, 0); - YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) - dither_8to32(c, chrDither, 1); - YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) - } - dither_8to32(c, lumDither, 0); - if (CONFIG_SWSCALE_ALPHA && aDest) { - YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) - } - - YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0) -} - static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, @@ -2104,7 +1900,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) if (!(c->flags & SWS_BITEXACT)) { if (c->flags & SWS_ACCURATE_RND) { //c->yuv2yuv1 = RENAME(yuv2yuv1_ar ); - //c->yuv2yuvX = RENAME(yuv2yuvX_ar ); if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break; @@ -2117,7 +1912,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } else { //c->yuv2yuv1 = RENAME(yuv2yuv1 ); - //c->yuv2yuvX = RENAME(yuv2yuvX ); if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; |