diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-06-30 17:35:13 -0700 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-07-01 09:08:26 -0700 |
commit | 8a8d0ce208b77f506759185ff580fa61b5c41f70 (patch) | |
tree | 544655de68668abe121739fd51ab4b964aa6ca96 /libswscale | |
parent | cdc2c1c57616956d975c57b4b69eb73865f513f5 (diff) | |
download | ffmpeg-8a8d0ce208b77f506759185ff580fa61b5c41f70.tar.gz |
swscale: for >8bit scaling, read in native bit-depth.
For 9/10bit, it means we don't have to upscale to 16bit before
actual scaling or pixel format conversion, and thus a performance
gain.
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/ppc/swscale_altivec.c | 2 | ||||
-rw-r--r-- | libswscale/swscale.c | 99 | ||||
-rw-r--r-- | libswscale/swscale_internal.h | 2 | ||||
-rw-r--r-- | libswscale/utils.c | 1 | ||||
-rw-r--r-- | libswscale/x86/swscale_template.c | 2 |
5 files changed, 27 insertions, 79 deletions
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 6fb3de0ee7..369e93b85a 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -220,7 +220,7 @@ yuv2yuvX_altivec_real(SwsContext *c, } } -static void hScale_altivec_real(int16_t *dst, int dstW, +static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int16_t *filterPos, int filterSize) { diff --git a/libswscale/swscale.c b/libswscale/swscale.c index ef8fc39c48..dacf40ed8a 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1766,59 +1766,6 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) -// FIXME Maybe dither instead. -static av_always_inline void -yuv9_OR_10ToUV_c_template(uint16_t *dstU, uint16_t *dstV, - const uint16_t *srcU, const uint16_t *srcV, - int width, enum PixelFormat origin, int depth) -{ - int i; - - for (i = 0; i < width; i++) { - int upx = input_pixel(&srcU[i]); - int vpx = input_pixel(&srcV[i]); - dstU[i] = (upx << (16 - depth)) | (upx >> (2 * depth - 16)); - dstV[i] = (vpx << (16 - depth)) | (vpx >> (2 * depth - 16)); - } -} - -static av_always_inline void -yuv9_or_10ToY_c_template(uint16_t *dstY, const uint16_t *srcY, - int width, enum PixelFormat origin, int depth) -{ - int i; - - for (i = 0; i < width; i++) { - int px = input_pixel(&srcY[i]); - dstY[i] = (px << (16 - depth)) | (px >> (2 * depth - 16)); - } -} - -#undef input_pixel - -#define YUV_NBPS(depth, BE_LE, origin) \ -static void BE_LE ## depth ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ - const uint8_t *_srcU, const uint8_t *_srcV, \ - int width, uint32_t *unused) \ -{ \ - uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ - const uint16_t *srcU = (const uint16_t *) _srcU, \ - *srcV = (const uint16_t *) _srcV; \ - yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \ -} \ -static void BE_LE ## depth ## ToY_c(uint8_t *_dstY, const uint8_t *_srcY, \ - int width, uint32_t *unused) \ -{ \ - uint16_t *dstY = (uint16_t *) _dstY; \ - const uint16_t *srcY = (const uint16_t *) _srcY; \ - yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \ -} - -YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE); -YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE); -YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE); -YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE); - static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) { @@ -1905,13 +1852,15 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, } } -static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src, +static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int16_t *filterPos, int filterSize) { int i; int32_t *dst = (int32_t *) _dst; const uint16_t *src = (const uint16_t *) _src; + int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + int sh = (bits <= 7) ? 11 : (bits - 4); for (i = 0; i < dstW; i++) { int j; @@ -1922,12 +1871,12 @@ static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src, val += src[srcPos + j] * filter[filterSize * i + j]; } // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit - dst[i] = FFMIN(val >> 11, (1 << 19) - 1); + dst[i] = FFMIN(val >> sh, (1 << 19) - 1); } } // bilinear / bicubic scaling -static void hScale_c(int16_t *dst, int dstW, const uint8_t *src, +static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int16_t *filterPos, int filterSize) { @@ -2063,7 +2012,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, } if (!c->hyscale_fast) { - c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize); + c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize); } else { // fast bilinear upscale / crap downscale c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); } @@ -2113,8 +2062,8 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 } if (!c->hcscale_fast) { - c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize); - c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize); + c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize); + c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize); } else { // fast bilinear upscale / crap downscale c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc); } @@ -2645,21 +2594,21 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_PAL8 : case PIX_FMT_BGR4_BYTE: case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break; +#if HAVE_BIGENDIAN case PIX_FMT_YUV444P9LE: - case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break; + case PIX_FMT_YUV420P9LE: case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV444P10LE: - case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break; - case PIX_FMT_YUV444P9BE: - case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break; - case PIX_FMT_YUV444P10BE: - case PIX_FMT_YUV422P10BE: - case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break; -#if HAVE_BIGENDIAN + case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break; #else + case PIX_FMT_YUV444P9BE: + case PIX_FMT_YUV420P9BE: + case PIX_FMT_YUV444P10BE: + case PIX_FMT_YUV422P10BE: + case PIX_FMT_YUV420P10BE: case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break; @@ -2712,22 +2661,22 @@ static av_cold void sws_init_swScale_c(SwsContext *c) c->lumToYV12 = NULL; c->alpToYV12 = NULL; switch (srcFormat) { +#if HAVE_BIGENDIAN case PIX_FMT_YUV444P9LE: - case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break; + case PIX_FMT_YUV420P9LE: case PIX_FMT_YUV444P10LE: case PIX_FMT_YUV422P10LE: - case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break; - case PIX_FMT_YUV444P9BE: - case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break; - case PIX_FMT_YUV444P10BE: - case PIX_FMT_YUV422P10BE: - case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break; -#if HAVE_BIGENDIAN + case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: case PIX_FMT_YUV444P16LE: case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break; #else + case PIX_FMT_YUV444P9BE: + case PIX_FMT_YUV420P9BE: + case PIX_FMT_YUV444P10BE: + case PIX_FMT_YUV422P10BE: + case PIX_FMT_YUV420P10BE: case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: case PIX_FMT_YUV444P16BE: diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index cfb19d9c13..e4b93c595b 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -440,7 +440,7 @@ typedef struct SwsContext { * (and input coefficients thus padded with zeroes) * to simplify creating SIMD code. */ - void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, + void (*hScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int16_t *filterPos, int filterSize); diff --git a/libswscale/utils.c b/libswscale/utils.c index b7ccac94ba..eea32a130a 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -877,7 +877,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) } } - // FIXME it's even nicer if bpp isn't 16, but max({src,dst}formatbpp) c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1, av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8; if (c->scalingBpp == 16) diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 54c7edc25e..f58ac520e1 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1859,7 +1859,7 @@ static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, #if !COMPILE_TEMPLATE_MMX2 // bilinear / bicubic scaling -static void RENAME(hScale)(int16_t *dst, int dstW, +static void RENAME(hScale)(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int16_t *filterPos, int filterSize) { |