diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-05-11 19:15:14 -0400 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-05-11 19:15:14 -0400 |
commit | c8f487deae75d4f25c2ec39ab484c1075f909bbd (patch) | |
tree | b396d4df1077aef64b17a794a3224ec540e4c3e3 /libswscale | |
parent | 5705b02079449c685a3dd337fcc3a8b440dca4a0 (diff) | |
download | ffmpeg-c8f487deae75d4f25c2ec39ab484c1075f909bbd.tar.gz |
swscale: fix YUV420P 9/10bit support.
Fix handling of input if not in native endianness, and add support for
9/10-bit output. This allows us to force endianness of YUV420P 9/10bit
in the H264/10bit fate tests, which should fix them on big-endian
systems.
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/ppc/swscale_template.c | 4 | ||||
-rw-r--r-- | libswscale/swscale.c | 175 | ||||
-rw-r--r-- | libswscale/swscale_internal.h | 2 | ||||
-rw-r--r-- | libswscale/swscale_template.c | 40 | ||||
-rw-r--r-- | libswscale/utils.c | 4 | ||||
-rw-r--r-- | libswscale/x86/swscale_template.c | 4 |
6 files changed, 173 insertions, 56 deletions
diff --git a/libswscale/ppc/swscale_template.c b/libswscale/ppc/swscale_template.c index 3e40c3f0a6..e69656ca15 100644 --- a/libswscale/ppc/swscale_template.c +++ b/libswscale/ppc/swscale_template.c @@ -639,7 +639,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi - if (is16BPS(dstFormat)) { + if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) { yuv2yuvX16inC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, @@ -716,7 +716,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi - if (is16BPS(dstFormat)) { + if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) { yuv2yuvX16inC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 2830f26ce5..b63a3868c5 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -212,10 +212,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, - int dstW, int chrDstW, int big_endian) + int dstW, int chrDstW, int big_endian, int output_bits) { //FIXME Optimize (just quickly written not optimized..) int i; + int shift = 11 + 16 - output_bits; for (i = 0; i < dstW; i++) { int val = 1 << 10; @@ -225,9 +226,9 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co val += lumSrc[j][i] * lumFilter[j]; if (big_endian) { - AV_WB16(&dest[i], av_clip_uint16(val >> 11)); + AV_WB16(&dest[i], av_clip_uint16(val >> shift)); } else { - AV_WL16(&dest[i], av_clip_uint16(val >> 11)); + AV_WL16(&dest[i], av_clip_uint16(val >> shift)); } } @@ -243,11 +244,11 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co } if (big_endian) { - AV_WB16(&uDest[i], av_clip_uint16(u >> 11)); - AV_WB16(&vDest[i], av_clip_uint16(v >> 11)); + AV_WB16(&uDest[i], av_clip_uint16(u >> shift)); + AV_WB16(&vDest[i], av_clip_uint16(v >> shift)); } else { - AV_WL16(&uDest[i], av_clip_uint16(u >> 11)); - AV_WL16(&vDest[i], av_clip_uint16(v >> 11)); + AV_WL16(&uDest[i], av_clip_uint16(u >> shift)); + AV_WL16(&vDest[i], av_clip_uint16(v >> shift)); } } } @@ -261,9 +262,9 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co val += alpSrc[j][i] * lumFilter[j]; if (big_endian) { - AV_WB16(&aDest[i], av_clip_uint16(val >> 11)); + AV_WB16(&aDest[i], av_clip_uint16(val >> shift)); } else { - AV_WL16(&aDest[i], av_clip_uint16(val >> 11)); + AV_WL16(&aDest[i], av_clip_uint16(val >> shift)); } } } @@ -274,19 +275,28 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW, enum PixelFormat dstFormat) { - if (isBE(dstFormat)) { - yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, - dest, uDest, vDest, aDest, - dstW, chrDstW, 1); +#define conv16(bits) \ + if (isBE(dstFormat)) { \ + yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrSrc, chrFilterSize, \ + alpSrc, \ + dest, uDest, vDest, aDest, \ + dstW, chrDstW, 1, bits); \ + } else { \ + yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrSrc, chrFilterSize, \ + alpSrc, \ + dest, uDest, vDest, aDest, \ + dstW, chrDstW, 0, bits); \ + } + if (is16BPS(dstFormat)) { + conv16(16); + } else if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) { + conv16(9); } else { - yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, - chrFilter, chrSrc, chrFilterSize, - alpSrc, - dest, uDest, vDest, aDest, - dstW, chrDstW, 0); + conv16(10); } +#undef conv16 } static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, @@ -1669,25 +1679,124 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ length*=2; fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128); } else { - if(isNBPS(c->srcFormat)) { - const int depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1; - uint16_t *srcPtr2 = (uint16_t*)srcPtr; + if(is9_OR_10BPS(c->srcFormat)) { + const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1; + const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; + const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; if (is16BPS(c->dstFormat)) { uint16_t *dstPtr2 = (uint16_t*)dstPtr; - for (i = 0; i < height; i++) { - for (j = 0; j < length; j++) - dstPtr2[j] = (srcPtr2[j]<<(16-depth)) | (srcPtr2[j]>>(2*depth-16)); - dstPtr2 += dstStride[plane]/2; - srcPtr2 += srcStride[plane]/2; +#define COPY9_OR_10TO16(rfunc, wfunc) \ + for (i = 0; i < height; i++) { \ + for (j = 0; j < length; j++) { \ + int srcpx = rfunc(&srcPtr2[j]); \ + wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \ + } \ + dstPtr2 += dstStride[plane]/2; \ + srcPtr2 += srcStride[plane]/2; \ + } + if (isBE(c->dstFormat)) { + if (isBE(c->srcFormat)) { + COPY9_OR_10TO16(AV_RB16, AV_WB16); + } else { + COPY9_OR_10TO16(AV_RL16, AV_WB16); + } + } else { + if (isBE(c->srcFormat)) { + COPY9_OR_10TO16(AV_RB16, AV_WL16); + } else { + COPY9_OR_10TO16(AV_RL16, AV_WL16); + } + } + } else if (is9_OR_10BPS(c->dstFormat)) { + uint16_t *dstPtr2 = (uint16_t*)dstPtr; +#define COPY9_OR_10TO9_OR_10(loop) \ + for (i = 0; i < height; i++) { \ + for (j = 0; j < length; j++) { \ + loop; \ + } \ + dstPtr2 += dstStride[plane]/2; \ + srcPtr2 += srcStride[plane]/2; \ + } +#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \ + if (dst_depth > src_depth) { \ + COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \ + wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \ + } else if (dst_depth < src_depth) { \ + COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]) >> 1)); \ + } else { \ + COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \ + } + if (isBE(c->dstFormat)) { + if (isBE(c->srcFormat)) { + COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16); + } else { + COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16); + } + } else { + if (isBE(c->srcFormat)) { + COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16); + } else { + COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16); + } } } else { // FIXME Maybe dither instead. - for (i = 0; i < height; i++) { - for (j = 0; j < length; j++) - dstPtr[j] = srcPtr2[j]>>(depth-8); - dstPtr += dstStride[plane]; - srcPtr2 += srcStride[plane]/2; +#define COPY9_OR_10TO8(rfunc) \ + for (i = 0; i < height; i++) { \ + for (j = 0; j < length; j++) { \ + dstPtr[j] = rfunc(&srcPtr2[j])>>(src_depth-8); \ + } \ + dstPtr += dstStride[plane]; \ + srcPtr2 += srcStride[plane]/2; \ + } + if (isBE(c->srcFormat)) { + COPY9_OR_10TO8(AV_RB16); + } else { + COPY9_OR_10TO8(AV_RL16); + } + } + } else if(is9_OR_10BPS(c->dstFormat)) { + const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; + uint16_t *dstPtr2 = (uint16_t*)dstPtr; + + if (is16BPS(c->srcFormat)) { + const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; +#define COPY16TO9_OR_10(rfunc, wfunc) \ + for (i = 0; i < height; i++) { \ + for (j = 0; j < length; j++) { \ + wfunc(&dstPtr2[j], rfunc(&srcPtr2[j])>>(16-dst_depth)); \ + } \ + dstPtr2 += dstStride[plane]/2; \ + srcPtr2 += srcStride[plane]/2; \ + } + if (isBE(c->dstFormat)) { + if (isBE(c->srcFormat)) { + COPY16TO9_OR_10(AV_RB16, AV_WB16); + } else { + COPY16TO9_OR_10(AV_RL16, AV_WB16); + } + } else { + if (isBE(c->srcFormat)) { + COPY16TO9_OR_10(AV_RB16, AV_WL16); + } else { + COPY16TO9_OR_10(AV_RL16, AV_WL16); + } + } + } else /* 8bit */ { +#define COPY8TO9_OR_10(wfunc) \ + for (i = 0; i < height; i++) { \ + for (j = 0; j < length; j++) { \ + const int srcpx = srcPtr[j]; \ + wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \ + } \ + dstPtr2 += dstStride[plane]/2; \ + srcPtr += srcStride[plane]; \ + } + if (isBE(c->dstFormat)) { + COPY8TO9_OR_10(AV_WB16); + } else { + COPY8TO9_OR_10(AV_WL16); } } } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) { diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 2d40215ea9..2369546cb7 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -354,7 +354,7 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_YUV422P16BE \ || (x)==PIX_FMT_YUV444P16BE \ ) -#define isNBPS(x) ( \ +#define is9_OR_10BPS(x) ( \ (x)==PIX_FMT_YUV420P9LE \ || (x)==PIX_FMT_YUV420P9BE \ || (x)==PIX_FMT_YUV420P10LE \ diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c index 81a8d66277..fe872561cb 100644 --- a/libswscale/swscale_template.c +++ b/libswscale/swscale_template.c @@ -164,6 +164,8 @@ static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) { int i; + // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so + // we need to skip each second pixel. Same for BEToUV. for (i=0; i<width; i++) { dstU[i]= src1[2*i + 1]; dstV[i]= src2[2*i + 1]; @@ -226,8 +228,8 @@ static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, } // FIXME Maybe dither instead. -#define YUV_NBPS(depth) \ -static inline void yuv ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ +#define YUV_NBPS(depth, endianness, rfunc) \ +static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ const uint8_t *_srcU, const uint8_t *_srcV, \ long width, uint32_t *unused) \ { \ @@ -235,21 +237,23 @@ static inline void yuv ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ const uint16_t *srcU = (const uint16_t*)_srcU; \ const uint16_t *srcV = (const uint16_t*)_srcV; \ for (i = 0; i < width; i++) { \ - dstU[i] = srcU[i]>>(depth-8); \ - dstV[i] = srcV[i]>>(depth-8); \ + dstU[i] = rfunc(&srcU[i])>>(depth-8); \ + dstV[i] = rfunc(&srcV[i])>>(depth-8); \ } \ } \ \ -static inline void yuv ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \ +static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint8_t *_srcY, long width, uint32_t *unused) \ { \ int i; \ const uint16_t *srcY = (const uint16_t*)_srcY; \ for (i = 0; i < width; i++) \ - dstY[i] = srcY[i]>>(depth-8); \ + dstY[i] = rfunc(&srcY[i])>>(depth-8); \ } \ -YUV_NBPS( 9) -YUV_NBPS(10) +YUV_NBPS( 9, LE, AV_RL16) +YUV_NBPS( 9, BE, AV_RB16) +YUV_NBPS(10, LE, AV_RL16) +YUV_NBPS(10, BE, AV_RB16) static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) @@ -666,7 +670,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi - if (is16BPS(dstFormat)) { + if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) { yuv2yuvX16inC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, @@ -743,7 +747,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi - if (is16BPS(dstFormat)) { + if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) { yuv2yuvX16inC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, @@ -816,10 +820,10 @@ static void sws_init_swScale_c(SwsContext *c) case PIX_FMT_PAL8 : case PIX_FMT_BGR4_BYTE: case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break; - case PIX_FMT_YUV420P9BE: - case PIX_FMT_YUV420P9LE: c->chrToYV12 = yuv9ToUV_c; break; - case PIX_FMT_YUV420P10BE: - case PIX_FMT_YUV420P10LE: c->chrToYV12 = yuv10ToUV_c; break; + case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break; + case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break; + case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break; + case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break; case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: case PIX_FMT_YUV444P16BE: c->chrToYV12 = BEToUV_c; break; @@ -866,10 +870,10 @@ static void sws_init_swScale_c(SwsContext *c) c->lumToYV12 = NULL; c->alpToYV12 = NULL; switch (srcFormat) { - case PIX_FMT_YUV420P9BE: - case PIX_FMT_YUV420P9LE: c->lumToYV12 = yuv9ToY_c; break; - case PIX_FMT_YUV420P10BE: - case PIX_FMT_YUV420P10LE: c->lumToYV12 = yuv10ToY_c; break; + case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break; + case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break; + case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break; + case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break; case PIX_FMT_YUYV422 : case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: diff --git a/libswscale/utils.c b/libswscale/utils.c index 4f9f269731..818d014159 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -143,9 +143,13 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt) || (x)==PIX_FMT_GRAY8 \ || (x)==PIX_FMT_YUV410P \ || (x)==PIX_FMT_YUV440P \ + || (x)==PIX_FMT_YUV420P9LE \ + || (x)==PIX_FMT_YUV420P10LE \ || (x)==PIX_FMT_YUV420P16LE \ || (x)==PIX_FMT_YUV422P16LE \ || (x)==PIX_FMT_YUV444P16LE \ + || (x)==PIX_FMT_YUV420P9BE \ + || (x)==PIX_FMT_YUV420P10BE \ || (x)==PIX_FMT_YUV420P16BE \ || (x)==PIX_FMT_YUV422P16BE \ || (x)==PIX_FMT_YUV444P16BE \ diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index b1009cb4e9..d719721693 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -2611,7 +2611,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi - if (is16BPS(dstFormat)) { + if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) { yuv2yuvX16inC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, @@ -2688,7 +2688,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi - if (is16BPS(dstFormat)) { + if (is16BPS(dstFormat) || is9_OR_10BPS(dstFormat)) { yuv2yuvX16inC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |