diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-01-10 02:50:54 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-01-10 03:50:41 +0100 |
commit | dd3ca3ea15392da8636c06764e2da31e6ca700f0 (patch) | |
tree | 97d3fc3bdb9463a99728e14d3cd4a0062aa3af19 /libswscale | |
parent | 4805a33043e9356fc344aa53c7df747d41ce6b37 (diff) | |
parent | a67b8c86d06eb5b78a0fe4cb9be4e93b29726db1 (diff) | |
download | ffmpeg-dd3ca3ea15392da8636c06764e2da31e6ca700f0.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
fate: Add tests for more AAC features.
aacps: Add missing newline in error message.
fate: Add tests for vc1/wmapro in ism.
aacdec: Add a fate test for 5.1 channel SBR.
aacdec: Turn off PS for multichannel files that use PCE based configs.
cabac: remove put_cabac_u/ueg from cabac-test.
swscale: RGB4444 and BGR444 input
FATE: add test for xWMA demuxer.
FATE: add test for SMJPEG demuxer and associated IMA ADPCM audio decoder.
mpegaudiodec: optimized iMDCT transform
mpegaudiodec: change imdct window arrangment for better pointer alignment
mpegaudiodec: move imdct and windowing function to mpegaudiodsp
mpegaudiodec: interleave iMDCT buffer to simplify future SIMD implementations
swscale: convert yuy2/uyvy/nv12/nv21ToY/UV from inline asm to yasm.
FATE: test to exercise WTV demuxer.
mjpegdec: K&R formatting cosmetics
swscale: K&R formatting cosmetics for code examples
swscale: K&R reformatting cosmetics for header files
FATE test: cvid-grayscale; ensures that the grayscale Cinepak variant is exercised.
Conflicts:
libavcodec/cabac.c
libavcodec/mjpegdec.c
libavcodec/mpegaudiodec.c
libavcodec/mpegaudiodsp.c
libavcodec/mpegaudiodsp.h
libavcodec/mpegaudiodsp_template.c
libavcodec/x86/Makefile
libavcodec/x86/imdct36_sse.asm
libavcodec/x86/mpegaudiodec_mmx.c
libswscale/swscale-test.c
libswscale/swscale.c
libswscale/swscale_internal.h
libswscale/x86/swscale_template.c
tests/fate/demux.mak
tests/fate/microsoft.mak
tests/fate/video.mak
tests/fate/wma.mak
tests/ref/lavfi/pixfmts_scale
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/Makefile | 3 | ||||
-rw-r--r-- | libswscale/colorspace-test.c | 79 | ||||
-rw-r--r-- | libswscale/ppc/yuv2rgb_altivec.h | 19 | ||||
-rw-r--r-- | libswscale/rgb2rgb.h | 30 | ||||
-rw-r--r-- | libswscale/swscale-test.c | 186 | ||||
-rw-r--r-- | libswscale/swscale.c | 16 | ||||
-rw-r--r-- | libswscale/swscale.h | 5 | ||||
-rw-r--r-- | libswscale/swscale_internal.h | 250 | ||||
-rw-r--r-- | libswscale/utils.c | 8 | ||||
-rw-r--r-- | libswscale/x86/input.asm | 242 | ||||
-rw-r--r-- | libswscale/x86/swscale_mmx.c | 83 | ||||
-rw-r--r-- | libswscale/x86/swscale_template.c | 163 |
12 files changed, 643 insertions, 441 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile index 78d0112c8e..77d896a76b 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -19,7 +19,8 @@ OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \ x86/swscale_mmx.o \ x86/yuv2rgb_mmx.o OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o -MMX-OBJS-$(HAVE_YASM) += x86/output.o \ +MMX-OBJS-$(HAVE_YASM) += x86/input.o \ + x86/output.o \ x86/scale.o $(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c index a5709e482e..89713a8a0c 100644 --- a/libswscale/colorspace-test.c +++ b/libswscale/colorspace-test.c @@ -27,19 +27,19 @@ #include "swscale.h" #include "rgb2rgb.h" -#define SIZE 1000 +#define SIZE 1000 #define srcByte 0x55 #define dstByte 0xBB -#define FUNC(s,d,n) {s,d,#n,n} +#define FUNC(s, d, n) { s, d, #n, n } int main(int argc, char **argv) { int i, funcNum; uint8_t *srcBuffer = av_malloc(SIZE); uint8_t *dstBuffer = av_malloc(SIZE); - int failedNum=0; - int passedNum=0; + int failedNum = 0; + int passedNum = 0; if (!srcBuffer || !dstBuffer) return -1; @@ -47,7 +47,7 @@ int main(int argc, char **argv) av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n"); sws_rgb2rgb_init(); - for(funcNum=0; ; funcNum++) { + for (funcNum = 0; ; funcNum++) { struct func_info_s { int src_bpp; int dst_bpp; @@ -85,67 +85,78 @@ int main(int argc, char **argv) FUNC(0, 0, NULL) }; int width; - int failed=0; - int srcBpp=0; - int dstBpp=0; + int failed = 0; + int srcBpp = 0; + int dstBpp = 0; - if (!func_info[funcNum].func) break; + if (!func_info[funcNum].func) + break; - av_log(NULL, AV_LOG_INFO,"."); + av_log(NULL, AV_LOG_INFO, "."); memset(srcBuffer, srcByte, SIZE); - for(width=63; width>0; width--) { + for (width = 63; width > 0; width--) { int dstOffset; - for(dstOffset=128; dstOffset<196; dstOffset+=4) { + for (dstOffset = 128; dstOffset < 196; dstOffset += 4) { int srcOffset; memset(dstBuffer, dstByte, SIZE); - for(srcOffset=128; srcOffset<196; srcOffset+=4) { - uint8_t *src= srcBuffer+srcOffset; - uint8_t *dst= dstBuffer+dstOffset; - const char *name=NULL; + for (srcOffset = 128; srcOffset < 196; srcOffset += 4) { + uint8_t *src = srcBuffer + srcOffset; + uint8_t *dst = dstBuffer + dstOffset; + const char *name = NULL; - if(failed) break; //don't fill the screen with shit ... + // don't fill the screen with shit ... + if (failed) + break; srcBpp = func_info[funcNum].src_bpp; dstBpp = func_info[funcNum].dst_bpp; name = func_info[funcNum].name; - func_info[funcNum].func(src, dst, width*srcBpp); + func_info[funcNum].func(src, dst, width * srcBpp); - if(!srcBpp) break; + if (!srcBpp) + break; - for(i=0; i<SIZE; i++) { - if(srcBuffer[i]!=srcByte) { - av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n", + for (i = 0; i < SIZE; i++) { + if (srcBuffer[i] != srcByte) { + av_log(NULL, AV_LOG_INFO, + "src damaged at %d w:%d src:%d dst:%d %s\n", i, width, srcOffset, dstOffset, name); - failed=1; + failed = 1; break; } } - for(i=0; i<dstOffset; i++) { - if(dstBuffer[i]!=dstByte) { - av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n", + for (i = 0; i < dstOffset; i++) { + if (dstBuffer[i] != dstByte) { + av_log(NULL, AV_LOG_INFO, + "dst damaged at %d w:%d src:%d dst:%d %s\n", i, width, srcOffset, dstOffset, name); - failed=1; + failed = 1; break; } } - for(i=dstOffset + width*dstBpp; i<SIZE; i++) { - if(dstBuffer[i]!=dstByte) { - av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n", + for (i = dstOffset + width * dstBpp; i < SIZE; i++) { + if (dstBuffer[i] != dstByte) { + av_log(NULL, AV_LOG_INFO, + "dst damaged at %d w:%d src:%d dst:%d %s\n", i, width, srcOffset, dstOffset, name); - failed=1; + failed = 1; break; } } } } } - if(failed) failedNum++; - else if(srcBpp) passedNum++; + if (failed) + failedNum++; + else if (srcBpp) + passedNum++; } - av_log(NULL, AV_LOG_INFO, "\n%d converters passed, %d converters randomly overwrote memory\n", passedNum, failedNum); + av_log(NULL, AV_LOG_INFO, + "\n%d converters passed, %d converters randomly overwrote memory\n", + passedNum, failedNum); return failedNum; } diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h index 7c2a7e547b..8c62c322e7 100644 --- a/libswscale/ppc/yuv2rgb_altivec.h +++ b/libswscale/ppc/yuv2rgb_altivec.h @@ -24,13 +24,18 @@ #ifndef SWSCALE_PPC_YUV2RGB_ALTIVEC_H #define SWSCALE_PPC_YUV2RGB_ALTIVEC_H -#define YUV2PACKEDX_HEADER(suffix) \ -void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \ - const int16_t **lumSrc, int lumFilterSize, \ - const int16_t *chrFilter, const int16_t **chrUSrc, \ - const int16_t **chrVSrc, int chrFilterSize, \ - const int16_t **alpSrc, uint8_t *dest, \ - int dstW, int dstY); +#define YUV2PACKEDX_HEADER(suffix) \ + void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \ + const int16_t *lumFilter, \ + const int16_t **lumSrc, \ + int lumFilterSize, \ + const int16_t *chrFilter, \ + const int16_t **chrUSrc, \ + const int16_t **chrVSrc, \ + int chrFilterSize, \ + const int16_t **alpSrc, \ + uint8_t *dest, \ + int dstW, int dstY); YUV2PACKEDX_HEADER(abgr); YUV2PACKEDX_HEADER(bgra); diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index e3edac88d4..a7542cb211 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -36,32 +36,33 @@ extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32to16) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32to15) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb15to16) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb15to32) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb16to15) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb16to32) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); -void rgb24to32 (const uint8_t *src, uint8_t *dst, int src_size); -void rgb32to24 (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); + +void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size); +void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size); void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size); -void rgb16to24 (const uint8_t *src, uint8_t *dst, int src_size); +void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size); void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size); void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size); void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size); -void rgb15to24 (const uint8_t *src, uint8_t *dst, int src_size); +void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size); void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size); void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size); -void bgr8torgb8 (const uint8_t *src, uint8_t *dst, int src_size); +void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); @@ -138,7 +139,6 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint int srcStride1, int srcStride2, int srcStride3, int dstStride); - extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride); diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c index 5dd2e34870..f382ce3d6a 100644 --- a/libswscale/swscale-test.c +++ b/libswscale/swscale-test.c @@ -35,33 +35,32 @@ /* HACK Duplicated from swscale_internal.h. * Should be removed when a cleaner pixel format system exists. */ -#define isGray(x) ( \ - (x)==PIX_FMT_GRAY8 \ - || (x)==PIX_FMT_GRAY16BE \ - || (x)==PIX_FMT_GRAY16LE \ - ) -#define hasChroma(x) (!( \ - isGray(x) \ - || (x)==PIX_FMT_MONOBLACK \ - || (x)==PIX_FMT_MONOWHITE \ - )) -#define isALPHA(x) ( \ - (x)==PIX_FMT_BGR32 \ - || (x)==PIX_FMT_BGR32_1 \ - || (x)==PIX_FMT_RGB32 \ - || (x)==PIX_FMT_RGB32_1 \ - || (x)==PIX_FMT_YUVA420P \ - ) - -static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h) +#define isGray(x) \ + ((x) == PIX_FMT_GRAY8 || \ + (x) == PIX_FMT_Y400A || \ + (x) == PIX_FMT_GRAY16BE || \ + (x) == PIX_FMT_GRAY16LE) +#define hasChroma(x) \ + (!(isGray(x) || \ + (x) == PIX_FMT_MONOBLACK || \ + (x) == PIX_FMT_MONOWHITE)) +#define isALPHA(x) \ + ((x) == PIX_FMT_BGR32 || \ + (x) == PIX_FMT_BGR32_1 || \ + (x) == PIX_FMT_RGB32 || \ + (x) == PIX_FMT_RGB32_1 || \ + (x) == PIX_FMT_YUVA420P) + +static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, + int stride2, int w, int h) { - int x,y; - uint64_t ssd=0; + int x, y; + uint64_t ssd = 0; - for (y=0; y<h; y++) { - for (x=0; x<w; x++) { - int d= src1[x + y*stride1] - src2[x + y*stride2]; - ssd+= d*d; + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + int d = src1[x + y * stride1] - src2[x + y * stride2]; + ssd += d * d; } } return ssd; @@ -86,14 +85,14 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, static int cur_srcW, cur_srcH; static uint8_t *src[4]; static int srcStride[4]; - uint8_t *dst[4] = {0}; - uint8_t *out[4] = {0}; + uint8_t *dst[4] = { 0 }; + uint8_t *out[4] = { 0 }; int dstStride[4]; int i; - uint64_t ssdY, ssdU=0, ssdV=0, ssdA=0; + uint64_t ssdY, ssdU = 0, ssdV = 0, ssdA = 0; struct SwsContext *dstContext = NULL, *outContext = NULL; uint32_t crc = 0; - int res = 0; + int res = 0; if (cur_srcFormat != srcFormat || cur_srcW != srcW || cur_srcH != srcH) { struct SwsContext *srcContext = NULL; @@ -106,11 +105,10 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, for (p = 0; p < 4; p++) { srcStride[p] = FFALIGN(srcStride[p], 16); if (srcStride[p]) - src[p] = av_mallocz(srcStride[p]*srcH+16); + src[p] = av_mallocz(srcStride[p] * srcH + 16); if (srcStride[p] && !src[p]) { perror("Malloc"); res = -1; - goto end; } } @@ -121,19 +119,18 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, av_pix_fmt_descriptors[PIX_FMT_YUVA420P].name, av_pix_fmt_descriptors[srcFormat].name); res = -1; - goto end; } sws_scale(srcContext, ref, refStride, 0, h, src, srcStride); sws_freeContext(srcContext); cur_srcFormat = srcFormat; - cur_srcW = srcW; - cur_srcH = srcH; + cur_srcW = srcW; + cur_srcH = srcH; } av_image_fill_linesizes(dstStride, dstFormat, dstW); - for (i=0; i<4; i++) { + for (i = 0; i < 4; i++) { /* Image buffers passed into libswscale can be allocated any way you * prefer, as long as they're aligned enough for the architecture, and * they're freed appropriately (such as using av_free for buffers @@ -142,7 +139,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, * out of bounds. */ dstStride[i] = FFALIGN(dstStride[i], 16); if (dstStride[i]) - dst[i]= av_mallocz(dstStride[i]*dstH+16); + dst[i] = av_mallocz(dstStride[i] * dstH + 16); if (dstStride[i] && !dst[i]) { perror("Malloc"); res = -1; @@ -151,13 +148,13 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, } } - dstContext= sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL, NULL); + dstContext = sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, + flags, NULL, NULL, NULL); if (!dstContext) { fprintf(stderr, "Failed to get %s ---> %s\n", av_pix_fmt_descriptors[srcFormat].name, av_pix_fmt_descriptors[dstFormat].name); res = -1; - goto end; } @@ -169,9 +166,9 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride); - for (i = 0; i < 4 && dstStride[i]; i++) { - crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], dstStride[i] * dstH); - } + for (i = 0; i < 4 && dstStride[i]; i++) + crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], + dstStride[i] * dstH); if (r && crc == r->crc) { ssdY = r->ssdY; @@ -179,61 +176,60 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, ssdV = r->ssdV; ssdA = r->ssdA; } else { - for (i=0; i<4; i++) { + for (i = 0; i < 4; i++) { refStride[i] = FFALIGN(refStride[i], 16); if (refStride[i]) - out[i]= av_mallocz(refStride[i]*h); + out[i] = av_mallocz(refStride[i] * h); if (refStride[i] && !out[i]) { perror("Malloc"); res = -1; - goto end; } } - outContext= sws_getContext(dstW, dstH, dstFormat, w, h, PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); + outContext = sws_getContext(dstW, dstH, dstFormat, w, h, + PIX_FMT_YUVA420P, SWS_BILINEAR, + NULL, NULL, NULL); if (!outContext) { fprintf(stderr, "Failed to get %s ---> %s\n", av_pix_fmt_descriptors[dstFormat].name, av_pix_fmt_descriptors[PIX_FMT_YUVA420P].name); res = -1; - goto end; } sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride); - ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h); + ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h); if (hasChroma(srcFormat) && hasChroma(dstFormat)) { //FIXME check that output is really gray - ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1); - ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1); + ssdU = getSSD(ref[1], out[1], refStride[1], refStride[1], + (w + 1) >> 1, (h + 1) >> 1); + ssdV = getSSD(ref[2], out[2], refStride[2], refStride[2], + (w + 1) >> 1, (h + 1) >> 1); } if (isALPHA(srcFormat) && isALPHA(dstFormat)) - ssdA= getSSD(ref[3], out[3], refStride[3], refStride[3], w, h); + ssdA = getSSD(ref[3], out[3], refStride[3], refStride[3], w, h); - ssdY/= w*h; - ssdU/= w*h/4; - ssdV/= w*h/4; - ssdA/= w*h; + ssdY /= w * h; + ssdU /= w * h / 4; + ssdV /= w * h / 4; + ssdA /= w * h; sws_freeContext(outContext); - for (i=0; i<4; i++) { + for (i = 0; i < 4; i++) if (refStride[i]) av_free(out[i]); - } } - printf(" CRC=%08x SSD=%5"PRId64",%5"PRId64",%5"PRId64",%5"PRId64"\n", + printf(" CRC=%08x SSD=%5"PRId64 ",%5"PRId64 ",%5"PRId64 ",%5"PRId64 "\n", crc, ssdY, ssdU, ssdV, ssdA); end: - sws_freeContext(dstContext); - for (i=0; i<4; i++) { + for (i = 0; i < 4; i++) if (dstStride[i]) av_free(dst[i]); - } return res; } @@ -242,18 +238,18 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h, enum PixelFormat srcFormat_in, enum PixelFormat dstFormat_in) { - const int flags[] = { SWS_FAST_BILINEAR, - SWS_BILINEAR, SWS_BICUBIC, - SWS_X , SWS_POINT , SWS_AREA, 0 }; - const int srcW = w; - const int srcH = h; - const int dstW[] = { srcW - srcW/3, srcW, srcW + srcW/3, 0 }; - const int dstH[] = { srcH - srcH/3, srcH, srcH + srcH/3, 0 }; + const int flags[] = { SWS_FAST_BILINEAR, SWS_BILINEAR, SWS_BICUBIC, + SWS_X, SWS_POINT, SWS_AREA, 0 }; + const int srcW = w; + const int srcH = h; + const int dstW[] = { srcW - srcW / 3, srcW, srcW + srcW / 3, 0 }; + const int dstH[] = { srcH - srcH / 3, srcH, srcH + srcH / 3, 0 }; enum PixelFormat srcFormat, dstFormat; for (srcFormat = srcFormat_in != PIX_FMT_NONE ? srcFormat_in : 0; srcFormat < PIX_FMT_NB; srcFormat++) { - if (!sws_isSupportedInput(srcFormat) || !sws_isSupportedOutput(srcFormat)) + if (!sws_isSupportedInput(srcFormat) || + !sws_isSupportedOutput(srcFormat)) continue; for (dstFormat = dstFormat_in != PIX_FMT_NONE ? dstFormat_in : 0; @@ -261,7 +257,8 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h, int i, j, k; int res = 0; - if (!sws_isSupportedInput(dstFormat) || !sws_isSupportedOutput(dstFormat)) + if (!sws_isSupportedInput(dstFormat) || + !sws_isSupportedOutput(dstFormat)) continue; printf("%s -> %s\n", @@ -269,14 +266,13 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h, av_pix_fmt_descriptors[dstFormat].name); fflush(stdout); - for (k = 0; flags[k] && !res; k++) { + for (k = 0; flags[k] && !res; k++) for (i = 0; dstW[i] && !res; i++) for (j = 0; dstH[j] && !res; j++) res = doTest(ref, refStride, w, h, srcFormat, dstFormat, srcW, srcH, dstW[i], dstH[j], flags[k], NULL); - } if (dstFormat_in != PIX_FMT_NONE) break; } @@ -302,13 +298,14 @@ static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp, int flags; int ret; - ret = sscanf(buf, " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x" - " SSD=%"PRId64", %"PRId64", %"PRId64", %"PRId64"\n", - srcStr, &srcW, &srcH, dstStr, &dstW, &dstH, - &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA); + ret = sscanf(buf, + " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x" + " SSD=%"PRId64 ", %"PRId64 ", %"PRId64 ", %"PRId64 "\n", + srcStr, &srcW, &srcH, dstStr, &dstW, &dstH, + &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA); if (ret != 12) { srcStr[0] = dstStr[0] = 0; - ret = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr); + ret = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr); } srcFormat = av_get_pix_fmt(srcStr); @@ -342,12 +339,12 @@ int main(int argc, char **argv) { enum PixelFormat srcFormat = PIX_FMT_NONE; enum PixelFormat dstFormat = PIX_FMT_NONE; - uint8_t *rgb_data = av_malloc (W*H*4); - uint8_t *rgb_src[3]= {rgb_data, NULL, NULL}; - int rgb_stride[3]={4*W, 0, 0}; - uint8_t *data = av_malloc (4*W*H); - uint8_t *src[4]= {data, data+W*H, data+W*H*2, data+W*H*3}; - int stride[4]={W, W, W, W}; + uint8_t *rgb_data = av_malloc(W * H * 4); + uint8_t *rgb_src[3] = { rgb_data, NULL, NULL }; + int rgb_stride[3] = { 4 * W, 0, 0 }; + uint8_t *data = av_malloc(4 * W * H); + uint8_t *src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; + int stride[4] = { W, W, W, W }; int x, y; struct SwsContext *sws; AVLFG rand; @@ -357,41 +354,40 @@ int main(int argc, char **argv) if (!rgb_data || !data) return -1; - sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); + sws = sws_getContext(W / 12, H / 12, PIX_FMT_RGB32, W, H, + PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); av_lfg_init(&rand, 1); - for (y=0; y<H; y++) { - for (x=0; x<W*4; x++) { - rgb_data[ x + y*4*W]= av_lfg_get(&rand); - } - } + for (y = 0; y < H; y++) + for (x = 0; x < W * 4; x++) + rgb_data[ x + y * 4 * W] = av_lfg_get(&rand); sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride); sws_freeContext(sws); av_free(rgb_data); for (i = 1; i < argc; i += 2) { - if (argv[i][0] != '-' || i+1 == argc) + if (argv[i][0] != '-' || i + 1 == argc) goto bad_option; if (!strcmp(argv[i], "-ref")) { - FILE *fp = fopen(argv[i+1], "r"); + FILE *fp = fopen(argv[i + 1], "r"); if (!fp) { - fprintf(stderr, "could not open '%s'\n", argv[i+1]); + fprintf(stderr, "could not open '%s'\n", argv[i + 1]); goto error; } res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat); fclose(fp); goto end; } else if (!strcmp(argv[i], "-src")) { - srcFormat = av_get_pix_fmt(argv[i+1]); + srcFormat = av_get_pix_fmt(argv[i + 1]); if (srcFormat == PIX_FMT_NONE) { - fprintf(stderr, "invalid pixel format %s\n", argv[i+1]); + fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); return -1; } } else if (!strcmp(argv[i], "-dst")) { - dstFormat = av_get_pix_fmt(argv[i+1]); + dstFormat = av_get_pix_fmt(argv[i + 1]); if (dstFormat == PIX_FMT_NONE) { - fprintf(stderr, "invalid pixel format %s\n", argv[i+1]); + fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); return -1; } } else { diff --git a/libswscale/swscale.c b/libswscale/swscale.c index dca6b073d0..3cb9bfdd27 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1637,12 +1637,16 @@ rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4) rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4) rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4) rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4) static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV, const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, @@ -2887,6 +2891,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break; case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break; case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break; + case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break; + case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break; case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break; case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break; case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break; @@ -2895,6 +2901,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break; case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break; case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break; + case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break; + case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break; } } else { switch(srcFormat) { @@ -2909,6 +2917,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break; case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break; case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break; + case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break; + case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break; case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break; case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break; case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break; @@ -2916,6 +2926,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break; case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break; case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break; + case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break; + case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break; } } @@ -2960,11 +2972,15 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break; case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break; case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break; + case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break; + case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break; case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break; case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break; case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break; case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break; case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break; + case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break; + case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break; case PIX_FMT_RGB8 : case PIX_FMT_BGR8 : case PIX_FMT_PAL8 : diff --git a/libswscale/swscale.h b/libswscale/swscale.h index f65d0767c0..fa7100c41a 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -135,7 +135,6 @@ const char *swscale_license(void); */ const int *sws_getCoefficients(int colorspace); - // when used for filters they must have an odd number of elements // coeffs cannot be shared between vectors typedef struct { @@ -235,9 +234,9 @@ struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat * the destination image * @return the height of the output slice */ -int sws_scale(struct SwsContext *c, const uint8_t* const srcSlice[], +int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, - uint8_t* const dst[], const int dstStride[]); + uint8_t *const dst[], const int dstStride[]); /** * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x] diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index a9830eff20..f05925f842 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -32,9 +32,9 @@ #include "libavutil/pixfmt.h" #include "libavutil/pixdesc.h" -#define STR(s) AV_TOSTRING(s) //AV_STRINGIFY is too long +#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long -#define FAST_BGR2YV12 //use 7-bit instead of 15-bit coefficients +#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients #define MAX_FILTER_SIZE 256 @@ -47,21 +47,20 @@ #endif #if ARCH_X86_64 -# define APCK_PTR2 8 +# define APCK_PTR2 8 # define APCK_COEF 16 # define APCK_SIZE 24 #else -# define APCK_PTR2 4 -# define APCK_COEF 8 +# define APCK_PTR2 4 +# define APCK_COEF 8 # define APCK_SIZE 16 #endif struct SwsContext; -typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], +typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, - uint8_t* dst[], int dstStride[]); - + uint8_t *dst[], int dstStride[]); /** * Write one line of horizontally scaled data to planar output @@ -75,8 +74,8 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], * @param dither ordered dither array of type int16_t and size 8 * @param offset Dither offset */ -typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset); +typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset); /** * Write one line of horizontally scaled data to planar output @@ -91,9 +90,9 @@ typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW, * @param dstW width of destination pixels * @param offset Dither offset */ -typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset); +typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset); /** * Write one line of horizontally scaled chroma to interleaved output @@ -110,9 +109,12 @@ typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize, * output, this is in uint16_t * @param dstW width of chroma planes */ -typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFilter, int chrFilterSize, - const int16_t **chrUSrc, const int16_t **chrVSrc, - uint8_t *dest, int dstW); +typedef void (*yuv2interleavedX_fn)(struct SwsContext *c, + const int16_t *chrFilter, + int chrFilterSize, + const int16_t **chrUSrc, + const int16_t **chrVSrc, + uint8_t *dest, int dstW); /** * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB @@ -143,10 +145,11 @@ typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFil * but can be used to generate comfort noise using dithering * for some output formats. */ -typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, - const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], - const int16_t *alpSrc, uint8_t *dest, - int dstW, int uvalpha, int y); +typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc[2], + const int16_t *chrVSrc[2], + const int16_t *alpSrc, uint8_t *dest, + int dstW, int uvalpha, int y); /** * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB * output by doing bilinear scaling between two input lines. @@ -175,10 +178,12 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, * but can be used to generate comfort noise using dithering * for some output formats. */ -typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], - const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], - const int16_t *alpSrc[2], uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y); +typedef void (*yuv2packed2_fn)(struct SwsContext *c, const int16_t *lumSrc[2], + const int16_t *chrUSrc[2], + const int16_t *chrVSrc[2], + const int16_t *alpSrc[2], + uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y); /** * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB * output by doing multi-point vertical scaling between input pixels. @@ -205,12 +210,13 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], * but can be used to generate comfort noise using dithering * or some output formats. */ -typedef void (*yuv2packedX_fn) (struct SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, - int dstW, int y); +typedef void (*yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, + const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, + int dstW, int y); /* This struct should be aligned on at least a 32-byte boundary. */ typedef struct SwsContext { @@ -263,12 +269,12 @@ typedef struct SwsContext { int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler. - int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. - int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. - int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer. - int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer. - int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source. - int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. + int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. + int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. + int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer. + int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer. + int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source. + int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. //@} uint8_t *formatConvBuffer; @@ -295,10 +301,10 @@ typedef struct SwsContext { int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. - int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. - int hChrFilterSize; ///< Horizontal filter size for chroma pixels. - int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. - int vChrFilterSize; ///< Vertical filter size for chroma pixels. + int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. + int hChrFilterSize; ///< Horizontal filter size for chroma pixels. + int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. + int vChrFilterSize; ///< Vertical filter size for chroma pixels. //@} int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes. @@ -310,11 +316,11 @@ typedef struct SwsContext { int dstY; ///< Last destination vertical line output from last slice. int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... - void * yuvTable; // pointer to the yuv->rgb table start so it can be freed() - uint8_t * table_rV[256]; - uint8_t * table_gU[256]; - int table_gV[256]; - uint8_t * table_bU[256]; + void *yuvTable; // pointer to the yuv->rgb table start so it can be freed() + uint8_t *table_rV[256]; + uint8_t *table_gU[256]; + int table_gV[256]; + uint8_t *table_bU[256]; //Colorspace stuff int contrast, brightness, saturation; // for sws_getColorspaceDetails @@ -366,15 +372,15 @@ typedef struct SwsContext { DECLARE_ALIGNED(8, uint64_t, yOffset); DECLARE_ALIGNED(8, uint64_t, uOffset); DECLARE_ALIGNED(8, uint64_t, vOffset); - int32_t lumMmxFilter[4*MAX_FILTER_SIZE]; - int32_t chrMmxFilter[4*MAX_FILTER_SIZE]; + int32_t lumMmxFilter[4 * MAX_FILTER_SIZE]; + int32_t chrMmxFilter[4 * MAX_FILTER_SIZE]; int dstW; ///< Width of destination luma/alpha planes. DECLARE_ALIGNED(8, uint64_t, esp); DECLARE_ALIGNED(8, uint64_t, vRounder); DECLARE_ALIGNED(8, uint64_t, u_temp); DECLARE_ALIGNED(8, uint64_t, v_temp); DECLARE_ALIGNED(8, uint64_t, y_temp); - int32_t alpMmxFilter[4*MAX_FILTER_SIZE]; + int32_t alpMmxFilter[4 * MAX_FILTER_SIZE]; // alignment of these values is not necessary, but merely here // to maintain the same offset across x8632 and x86-64. Once we // use proper offset macros in the asm, they can be removed. @@ -393,7 +399,7 @@ typedef struct SwsContext { vector signed short CGV; vector signed short OY; vector unsigned short CSHIFT; - vector signed short *vYCoeffsBank, *vCCoeffsBank; + vector signed short *vYCoeffsBank, *vCCoeffsBank; #endif #if ARCH_BFIN @@ -423,21 +429,25 @@ typedef struct SwsContext { yuv2packed2_fn yuv2packed2; yuv2packedX_fn yuv2packedX; + /// Unscaled conversion of luma plane to YV12 for horizontal scaler. void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler. + int width, uint32_t *pal); + /// Unscaled conversion of alpha plane to YV12 for horizontal scaler. void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler. + int width, uint32_t *pal); + /// Unscaled conversion of chroma planes to YV12 for horizontal scaler. void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, - int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler. + int width, uint32_t *pal); /** - * Functions to read planar input, such as planar RGB, and convert - * internally to Y/UV. - */ + * Functions to read planar input, such as planar RGB, and convert + * internally to Y/UV. + */ /** @{ */ void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width); - void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width); + void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], + int width); /** @} */ /** @@ -499,19 +509,20 @@ typedef struct SwsContext { * to simplify creating SIMD code. */ /** @{ */ - void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, - int filterSize); - void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, - int filterSize); + void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, + const uint8_t *src, const int16_t *filter, + const int16_t *filterPos, int filterSize); + void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, + const uint8_t *src, const int16_t *filter, + const int16_t *filterPos, int filterSize); /** @} */ - void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed. - void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed. + /// Color range conversion function for luma plane if needed. + void (*lumConvertRange)(int16_t *dst, int width); + /// Color range conversion function for chroma planes if needed. + void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); int needs_hcscale; ///< Set if there are chroma planes to be converted. - } SwsContext; //FIXME check init (where 0) @@ -567,53 +578,54 @@ const char *sws_format_name(enum PixelFormat format); (!(av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) && \ av_pix_fmt_descriptors[x].nb_components <= 2) #else -#define isGray(x) ( \ - (x)==PIX_FMT_GRAY8 \ - || (x)==PIX_FMT_GRAY8A \ - || (x)==PIX_FMT_GRAY16BE \ - || (x)==PIX_FMT_GRAY16LE \ - ) +#define isGray(x) \ + ((x) == PIX_FMT_GRAY8 || \ + (x) == PIX_FMT_Y400A || \ + (x) == PIX_FMT_GRAY16BE || \ + (x) == PIX_FMT_GRAY16LE) #endif -#define isRGBinInt(x) ( \ - (x)==PIX_FMT_RGB48BE \ - || (x)==PIX_FMT_RGB48LE \ - || (x)==PIX_FMT_RGBA64BE \ - || (x)==PIX_FMT_RGBA64LE \ - || (x)==PIX_FMT_RGB32 \ - || (x)==PIX_FMT_RGB32_1 \ - || (x)==PIX_FMT_RGB24 \ - || (x)==PIX_FMT_RGB565BE \ - || (x)==PIX_FMT_RGB565LE \ - || (x)==PIX_FMT_RGB555BE \ - || (x)==PIX_FMT_RGB555LE \ - || (x)==PIX_FMT_RGB444BE \ - || (x)==PIX_FMT_RGB444LE \ - || (x)==PIX_FMT_RGB8 \ - || (x)==PIX_FMT_RGB4 \ - || (x)==PIX_FMT_RGB4_BYTE \ - || (x)==PIX_FMT_MONOBLACK \ - || (x)==PIX_FMT_MONOWHITE \ +#define isRGBinInt(x) \ + ( \ + (x)==PIX_FMT_RGB48BE || \ + (x)==PIX_FMT_RGB48LE || \ + (x)==PIX_FMT_RGBA64BE || \ + (x)==PIX_FMT_RGBA64LE || \ + (x)==PIX_FMT_RGB32 || \ + (x)==PIX_FMT_RGB32_1 || \ + (x)==PIX_FMT_RGB24 || \ + (x)==PIX_FMT_RGB565BE || \ + (x)==PIX_FMT_RGB565LE || \ + (x)==PIX_FMT_RGB555BE || \ + (x)==PIX_FMT_RGB555LE || \ + (x)==PIX_FMT_RGB444BE || \ + (x)==PIX_FMT_RGB444LE || \ + (x)==PIX_FMT_RGB8 || \ + (x)==PIX_FMT_RGB4 || \ + (x)==PIX_FMT_RGB4_BYTE || \ + (x)==PIX_FMT_MONOBLACK || \ + (x)==PIX_FMT_MONOWHITE \ ) -#define isBGRinInt(x) ( \ - (x)==PIX_FMT_BGR48BE \ - || (x)==PIX_FMT_BGR48LE \ - || (x)==PIX_FMT_BGRA64BE \ - || (x)==PIX_FMT_BGRA64LE \ - || (x)==PIX_FMT_BGR32 \ - || (x)==PIX_FMT_BGR32_1 \ - || (x)==PIX_FMT_BGR24 \ - || (x)==PIX_FMT_BGR565BE \ - || (x)==PIX_FMT_BGR565LE \ - || (x)==PIX_FMT_BGR555BE \ - || (x)==PIX_FMT_BGR555LE \ - || (x)==PIX_FMT_BGR444BE \ - || (x)==PIX_FMT_BGR444LE \ - || (x)==PIX_FMT_BGR8 \ - || (x)==PIX_FMT_BGR4 \ - || (x)==PIX_FMT_BGR4_BYTE \ - || (x)==PIX_FMT_MONOBLACK \ - || (x)==PIX_FMT_MONOWHITE \ +#define isBGRinInt(x) \ + ( \ + (x)==PIX_FMT_BGR48BE || \ + (x)==PIX_FMT_BGR48LE || \ + (x)==PIX_FMT_BGRA64BE || \ + (x)==PIX_FMT_BGRA64LE || \ + (x)==PIX_FMT_BGR32 || \ + (x)==PIX_FMT_BGR32_1 || \ + (x)==PIX_FMT_BGR24 || \ + (x)==PIX_FMT_BGR565BE || \ + (x)==PIX_FMT_BGR565LE || \ + (x)==PIX_FMT_BGR555BE || \ + (x)==PIX_FMT_BGR555LE || \ + (x)==PIX_FMT_BGR444BE || \ + (x)==PIX_FMT_BGR444LE || \ + (x)==PIX_FMT_BGR8 || \ + (x)==PIX_FMT_BGR4 || \ + (x)==PIX_FMT_BGR4_BYTE|| \ + (x)==PIX_FMT_MONOBLACK|| \ + (x)==PIX_FMT_MONOWHITE \ ) #define isRGBinBytes(x) ( \ @@ -635,10 +647,11 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_BGR24 \ ) -#define isAnyRGB(x) ( \ - isRGBinInt(x) \ - || isBGRinInt(x) \ - || (x)==PIX_FMT_GBR24P \ +#define isAnyRGB(x) \ + ( \ + isRGBinInt(x) || \ + isBGRinInt(x) || \ + (x)==PIX_FMT_GBR24P \ ) #define isALPHA(x) \ @@ -655,15 +668,14 @@ const char *sws_format_name(enum PixelFormat format); || isBGRinInt(x) \ ) #else -#define isPacked(x) (\ - (av_pix_fmt_descriptors[x].nb_components >= 2 && \ - !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \ - (x) == PIX_FMT_PAL8\ - ) +#define isPacked(x) \ + ((av_pix_fmt_descriptors[x].nb_components >= 2 && \ + !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \ + (x) == PIX_FMT_PAL8) #endif #define isPlanar(x) \ - (av_pix_fmt_descriptors[x].nb_components >= 2 && \ + (av_pix_fmt_descriptors[x].nb_components >= 2 && \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) #define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_Y400A) diff --git a/libswscale/utils.c b/libswscale/utils.c index 7d87a13617..1bbe58e58b 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -129,10 +129,10 @@ const static FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_YUV422P16BE] = { 1 , 1 }, [PIX_FMT_YUV444P16LE] = { 1 , 1 }, [PIX_FMT_YUV444P16BE] = { 1 , 1 }, - [PIX_FMT_RGB444LE] = { 0 , 1 }, - [PIX_FMT_RGB444BE] = { 0 , 1 }, - [PIX_FMT_BGR444LE] = { 0 , 1 }, - [PIX_FMT_BGR444BE] = { 0 , 1 }, + [PIX_FMT_RGB444LE] = { 1 , 1 }, + [PIX_FMT_RGB444BE] = { 1 , 1 }, + [PIX_FMT_BGR444LE] = { 1 , 1 }, + [PIX_FMT_BGR444BE] = { 1 , 1 }, [PIX_FMT_Y400A] = { 1 , 0 }, [PIX_FMT_BGR48BE] = { 1 , 1 }, [PIX_FMT_BGR48LE] = { 1 , 1 }, diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm new file mode 100644 index 0000000000..af53dab7d6 --- /dev/null +++ b/libswscale/x86/input.asm @@ -0,0 +1,242 @@ +;****************************************************************************** +;* x86-optimized input routines; does shuffling of packed +;* YUV formats into individual planes, and converts RGB +;* into YUV planes also. +;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA + +SECTION .text + +;----------------------------------------------------------------------------- +; YUYV/UYVY/NV12/NV21 packed pixel shuffling. +; +; void <fmt>ToY_<opt>(uint8_t *dst, const uint8_t *src, int w); +; and +; void <fmt>toUV_<opt>(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, +; const uint8_t *unused, int w); +;----------------------------------------------------------------------------- + +; %1 = a (aligned) or u (unaligned) +; %2 = yuyv or uyvy +%macro LOOP_YUYV_TO_Y 2 +.loop_%1: + mov%1 m0, [srcq+wq*2] ; (byte) { Y0, U0, Y1, V0, ... } + mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } +%ifidn %2, yuyv + pand m0, m2 ; (word) { Y0, Y1, ..., Y7 } + pand m1, m2 ; (word) { Y8, Y9, ..., Y15 } +%else ; uyvy + psrlw m0, 8 ; (word) { Y0, Y1, ..., Y7 } + psrlw m1, 8 ; (word) { Y8, Y9, ..., Y15 } +%endif ; yuyv/uyvy + packuswb m0, m1 ; (byte) { Y0, ..., Y15 } + mova [dstq+wq], m0 + add wq, mmsize + jl .loop_%1 + REP_RET +%endmacro + +; %1 = nr. of XMM registers +; %2 = yuyv or uyvy +; %3 = if specified, it means that unaligned and aligned code in loop +; will be the same (i.e. YUYV+AVX), and thus we don't need to +; split the loop in an aligned and unaligned case +%macro YUYV_TO_Y_FN 2-3 +cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w +%ifdef ARCH_X86_64 + movsxd wq, wd +%endif + add dstq, wq +%if mmsize == 16 + test srcq, 15 +%endif + lea srcq, [srcq+wq*2] +%ifidn %2, yuyv + pcmpeqb m2, m2 ; (byte) { 0xff } x 16 + psrlw m2, 8 ; (word) { 0x00ff } x 8 +%endif ; yuyv +%if mmsize == 16 + jnz .loop_u_start + neg wq + LOOP_YUYV_TO_Y a, %2 +.loop_u_start: + neg wq + LOOP_YUYV_TO_Y u, %2 +%else ; mmsize == 8 + neg wq + LOOP_YUYV_TO_Y a, %2 +%endif ; mmsize == 8/16 +%endmacro + +; %1 = a (aligned) or u (unaligned) +; %2 = yuyv or uyvy +%macro LOOP_YUYV_TO_UV 2 +.loop_%1: +%ifidn %2, yuyv + mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... } + mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } + psrlw m0, 8 ; (word) { U0, V0, ..., U3, V3 } + psrlw m1, 8 ; (word) { U4, V4, ..., U7, V7 } +%else ; uyvy +%if cpuflag(avx) + vpand m0, m2, [srcq+wq*4] ; (word) { U0, V0, ..., U3, V3 } + vpand m1, m2, [srcq+wq*4+mmsize] ; (word) { U4, V4, ..., U7, V7 } +%else + mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... } + mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } + pand m0, m2 ; (word) { U0, V0, ..., U3, V3 } + pand m1, m2 ; (word) { U4, V4, ..., U7, V7 } +%endif +%endif ; yuyv/uyvy + packuswb m0, m1 ; (byte) { U0, V0, ..., U7, V7 } + pand m1, m0, m2 ; (word) { U0, U1, ..., U7 } + psrlw m0, 8 ; (word) { V0, V1, ..., V7 } +%if mmsize == 16 + packuswb m1, m0 ; (byte) { U0, ... U7, V1, ... V7 } + movh [dstUq+wq], m1 + movhps [dstVq+wq], m1 +%else ; mmsize == 8 + packuswb m1, m1 ; (byte) { U0, ... U3 } + packuswb m0, m0 ; (byte) { V0, ... V3 } + movh [dstUq+wq], m1 + movh [dstVq+wq], m0 +%endif ; mmsize == 8/16 + add wq, mmsize / 2 + jl .loop_%1 + REP_RET +%endmacro + +; %1 = nr. of XMM registers +; %2 = yuyv or uyvy +; %3 = if specified, it means that unaligned and aligned code in loop +; will be the same (i.e. UYVY+AVX), and thus we don't need to +; split the loop in an aligned and unaligned case +%macro YUYV_TO_UV_FN 2-3 +cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w +%ifdef ARCH_X86_64 + movsxd wq, r5m +%else ; x86-32 + mov wq, r5m +%endif + add dstUq, wq + add dstVq, wq +%if mmsize == 16 && %0 == 2 + test srcq, 15 +%endif + lea srcq, [srcq+wq*4] + pcmpeqb m2, m2 ; (byte) { 0xff } x 16 + psrlw m2, 8 ; (word) { 0x00ff } x 8 + ; NOTE: if uyvy+avx, u/a are identical +%if mmsize == 16 && %0 == 2 + jnz .loop_u_start + neg wq + LOOP_YUYV_TO_UV a, %2 +.loop_u_start: + neg wq + LOOP_YUYV_TO_UV u, %2 +%else ; mmsize == 8 + neg wq + LOOP_YUYV_TO_UV a, %2 +%endif ; mmsize == 8/16 +%endmacro + +; %1 = a (aligned) or u (unaligned) +; %2 = nv12 or nv21 +%macro LOOP_NVXX_TO_UV 2 +.loop_%1: + mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... } + mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... } + pand m2, m0, m5 ; (word) { U0, U1, ..., U7 } + pand m3, m1, m5 ; (word) { U8, U9, ..., U15 } + psrlw m0, 8 ; (word) { V0, V1, ..., V7 } + psrlw m1, 8 ; (word) { V8, V9, ..., V15 } + packuswb m2, m3 ; (byte) { U0, ..., U15 } + packuswb m0, m1 ; (byte) { V0, ..., V15 } +%ifidn %2, nv12 + mova [dstUq+wq], m2 + mova [dstVq+wq], m0 +%else ; nv21 + mova [dstVq+wq], m2 + mova [dstUq+wq], m0 +%endif ; nv12/21 + add wq, mmsize + jl .loop_%1 + REP_RET +%endmacro + +; %1 = nr. of XMM registers +; %2 = nv12 or nv21 +%macro NVXX_TO_UV_FN 2 +cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w +%ifdef ARCH_X86_64 + movsxd wq, r5m +%else ; x86-32 + mov wq, r5m +%endif + add dstUq, wq + add dstVq, wq +%if mmsize == 16 + test srcq, 15 +%endif + lea srcq, [srcq+wq*2] + pcmpeqb m5, m5 ; (byte) { 0xff } x 16 + psrlw m5, 8 ; (word) { 0x00ff } x 8 +%if mmsize == 16 + jnz .loop_u_start + neg wq + LOOP_NVXX_TO_UV a, %2 +.loop_u_start: + neg wq + LOOP_NVXX_TO_UV u, %2 +%else ; mmsize == 8 + neg wq + LOOP_NVXX_TO_UV a, %2 +%endif ; mmsize == 8/16 +%endmacro + +%ifdef ARCH_X86_32 +INIT_MMX mmx +YUYV_TO_Y_FN 0, yuyv +YUYV_TO_Y_FN 0, uyvy +YUYV_TO_UV_FN 0, yuyv +YUYV_TO_UV_FN 0, uyvy +NVXX_TO_UV_FN 0, nv12 +NVXX_TO_UV_FN 0, nv21 +%endif + +INIT_XMM sse2 +YUYV_TO_Y_FN 3, yuyv +YUYV_TO_Y_FN 2, uyvy +YUYV_TO_UV_FN 3, yuyv +YUYV_TO_UV_FN 3, uyvy +NVXX_TO_UV_FN 5, nv12 +NVXX_TO_UV_FN 5, nv21 + +INIT_XMM avx +; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but +; that's not faster in practice +YUYV_TO_UV_FN 3, yuyv +YUYV_TO_UV_FN 3, uyvy, 1 +NVXX_TO_UV_FN 5, nv12 +NVXX_TO_UV_FN 5, nv21 diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index 7cac5d80fd..66c4f69394 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -307,6 +307,26 @@ VSCALE_FUNCS(sse2, sse2); VSCALE_FUNC(16, sse4); VSCALE_FUNCS(avx, avx); +#define INPUT_UV_FUNC(fmt, opt) \ +extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src, const uint8_t *unused1, \ + int w, uint32_t *unused2) +#define INPUT_FUNC(fmt, opt) \ +extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ + int w, uint32_t *unused); \ + INPUT_UV_FUNC(fmt, opt) +#define INPUT_FUNCS(opt) \ + INPUT_FUNC(uyvy, opt); \ + INPUT_FUNC(yuyv, opt); \ + INPUT_UV_FUNC(nv12, opt); \ + INPUT_UV_FUNC(nv21, opt) + +#if ARCH_X86_32 +INPUT_FUNCS(mmx); +#endif +INPUT_FUNCS(sse2); +INPUT_FUNCS(avx); + void ff_sws_init_swScale_mmx(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -366,6 +386,30 @@ switch(c->dstBpc){ \ ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2); + + switch (c->srcFormat) { + case PIX_FMT_Y400A: + c->lumToYV12 = ff_yuyvToY_mmx; + if (c->alpPixBuf) + c->alpToYV12 = ff_uyvyToY_mmx; + break; + case PIX_FMT_YUYV422: + c->lumToYV12 = ff_yuyvToY_mmx; + c->chrToYV12 = ff_yuyvToUV_mmx; + break; + case PIX_FMT_UYVY422: + c->lumToYV12 = ff_uyvyToY_mmx; + c->chrToYV12 = ff_uyvyToUV_mmx; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_mmx; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_mmx; + break; + default: + break; + } } if (cpu_flags & AV_CPU_FLAG_MMX2) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2,); @@ -384,6 +428,28 @@ switch(c->dstBpc){ \ ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2,); ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1); + + switch (c->srcFormat) { + case PIX_FMT_Y400A: + c->lumToYV12 = ff_yuyvToY_sse2; + if (c->alpPixBuf) + c->alpToYV12 = ff_uyvyToY_sse2; + break; + case PIX_FMT_YUYV422: + c->lumToYV12 = ff_yuyvToY_sse2; + c->chrToYV12 = ff_yuyvToUV_sse2; + break; + case PIX_FMT_UYVY422: + c->lumToYV12 = ff_uyvyToY_sse2; + c->chrToYV12 = ff_uyvyToUV_sse2; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_sse2; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_sse2; + break; + } } if (cpu_flags & AV_CPU_FLAG_SSSE3) { ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); @@ -402,6 +468,23 @@ switch(c->dstBpc){ \ if (cpu_flags & AV_CPU_FLAG_AVX) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,); ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); + + switch (c->srcFormat) { + case PIX_FMT_YUYV422: + c->chrToYV12 = ff_yuyvToUV_avx; + break; + case PIX_FMT_UYVY422: + c->chrToYV12 = ff_uyvyToUV_avx; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_avx; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_avx; + break; + default: + break; + } } #endif } diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index bb351c2394..79c63b7d47 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1435,147 +1435,6 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, } } -#if !COMPILE_TEMPLATE_MMX2 -//FIXME yuy2* can read up to 7 samples too much - -static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, - int width, uint32_t *unused) -{ - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm2 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "pand %%mm2, %%mm0 \n\t" - "pand %%mm2, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) - : "%"REG_a - ); -} - -static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",4), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",4), %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm1, %%mm1 \n\t" - "movd %%mm0, (%3, %%"REG_a") \n\t" - "movd %%mm1, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) - : "%"REG_a - ); - assert(src1 == src2); -} - -/* This is almost identical to the previous, end exists only because - * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, - int width, uint32_t *unused) -{ - __asm__ volatile( - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) - : "%"REG_a - ); -} - -static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",4), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",4), %%mm1 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm1, %%mm1 \n\t" - "movd %%mm0, (%3, %%"REG_a") \n\t" - "movd %%mm1, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) - : "%"REG_a - ); - assert(src1 == src2); -} - -static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, - const uint8_t *src, int width) -{ - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "psrlw $8, %%mm2 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "movq %%mm2, (%3, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width) - : "%"REG_a - ); -} - -static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - RENAME(nvXXtoUV)(dstU, dstV, src1, width); -} - -static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - RENAME(nvXXtoUV)(dstV, dstU, src1, width); -} -#endif /* !COMPILE_TEMPLATE_MMX2 */ - static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat) { @@ -1927,15 +1786,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) #endif /* COMPILE_TEMPLATE_MMX2 */ } -#if !COMPILE_TEMPLATE_MMX2 - switch(srcFormat) { - case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break; - case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break; - case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break; - case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break; - default: break; - } -#endif /* !COMPILE_TEMPLATE_MMX2 */ if (!c->chrSrcHSubSample) { switch(srcFormat) { case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break; @@ -1945,21 +1795,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } switch (srcFormat) { -#if !COMPILE_TEMPLATE_MMX2 - case PIX_FMT_YUYV422 : - case PIX_FMT_Y400A : c->lumToYV12 = RENAME(yuy2ToY); break; - case PIX_FMT_UYVY422 : c->lumToYV12 = RENAME(uyvyToY); break; -#endif /* !COMPILE_TEMPLATE_MMX2 */ case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break; case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break; default: break; } -#if !COMPILE_TEMPLATE_MMX2 - if (c->alpPixBuf) { - switch (srcFormat) { - case PIX_FMT_Y400A : c->alpToYV12 = RENAME(yuy2ToY); break; - default: break; - } - } -#endif /* !COMPILE_TEMPLATE_MMX2 */ } |