aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale/x86
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-06-29 04:08:31 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-06-29 05:23:12 +0200
commitbb9d5171a7352205ac9f09c970e24938fab57165 (patch)
tree2abd874837de6e7cc0f21d1f84e13555b2eeb755 /libswscale/x86
parentdbe5f0172b4f123b15bc8ada82dd17b13c4bbbd7 (diff)
parent4578435f35888c95b12a53a12cdab612ac3fef04 (diff)
downloadffmpeg-bb9d5171a7352205ac9f09c970e24938fab57165.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: (21 commits) swscale: Add Doxygen for hyscale_fast/hScale. fate: enable lavfi-pixmt tests on big endian systems PPC: swscale: disable altivec functions for unsupported formats fate: merge identical pixdesc_be/le tests swscale: Add Doxygen for yuv2planar*/yuv2packed* functions. build: call texi2pod.pl with full path instead of symlink build: include sub-makefiles using full path instead of symlinks swscale: update big endian reference values after dff5a835. wavpack: skip blocks with no samples cosmetics: remove outdated comment that is no longer true build: replace some addprefix/addsuffix with substitution refs avutil: Remove unused arbitrary precision integer code. configure: Drop check for availability of ten assembler operands. aacenc: Save channel configuration for later use. aacenc: Fix codebook trellising for zeroed bands. swscale: change prototypes of scaled YUV output functions. swscale: re-add support for non-native endianness. swscale: disentangle yuv2rgbX_c_full() into small functions. swscale: split yuv2packed[12X]_c() remainders into small functions. swscale: split yuv2packedX_altivec in smaller functions. ... Conflicts: Makefile configure libavcodec/x86/dsputil_mmx.c libavfilter/Makefile libavformat/Makefile libavutil/integer.c libavutil/integer.h libswscale/swscale.c libswscale/swscale_internal.h libswscale/x86/swscale_template.c tests/ref/lavfi/pixdesc_le tests/ref/lavfi/pixfmts_scale Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/x86')
-rw-r--r--libswscale/x86/swscale_template.c159
1 files changed, 83 insertions, 76 deletions
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index fdf82b2d06..ae0d394078 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -75,11 +75,13 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc,
int chrFilterSize, const int16_t **alpSrc,
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
- uint8_t *aDest, int dstW, int chrDstW,
+ uint8_t *dest[4], int dstW, int chrDstW,
const uint8_t *lumDither, const uint8_t *chrDither)
{
int i;
+ uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+ *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
+
if (uDest) {
x86_reg uv_off = c->uv_off;
for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4;
@@ -92,7 +94,7 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
}
- YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
+ YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
}
#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
@@ -160,11 +162,13 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc,
int chrFilterSize, const int16_t **alpSrc,
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
- uint8_t *aDest, int dstW, int chrDstW,
+ uint8_t *dest[4], int dstW, int chrDstW,
const uint8_t *lumDither, const uint8_t *chrDither)
{
int i;
+ uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+ *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
+
if (uDest) {
x86_reg uv_off = c->uv_off;
for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12;
@@ -177,20 +181,21 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
}
- YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
+ YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
}
static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
const int16_t *chrUSrc, const int16_t *chrVSrc,
const int16_t *alpSrc,
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
- uint8_t *aDest, int dstW, int chrDstW,
+ uint8_t *dst[4], int dstW, int chrDstW,
const uint8_t *lumDither, const uint8_t *chrDither)
{
int p= 4;
- const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
- uint8_t *dst[4]= { aDest, dest, uDest, vDest };
- x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+ const int16_t *src[4]= {
+ lumSrc + dstW, chrUSrc + chrDstW,
+ chrVSrc + chrDstW, alpSrc + dstW
+ };
+ x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };
while (p--) {
if (dst[p]) {
@@ -217,14 +222,15 @@ static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
const int16_t *chrUSrc, const int16_t *chrVSrc,
const int16_t *alpSrc,
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
- uint8_t *aDest, int dstW, int chrDstW,
+ uint8_t *dst[4], int dstW, int chrDstW,
const uint8_t *lumDither, const uint8_t *chrDither)
{
int p= 4;
- const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
- uint8_t *dst[4]= { aDest, dest, uDest, vDest };
- x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+ const int16_t *src[4]= {
+ lumSrc + dstW, chrUSrc + chrDstW,
+ chrVSrc + chrDstW, alpSrc + dstW
+ };
+ x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };
while (p--) {
if (dst[p]) {
@@ -981,14 +987,16 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
/**
* vertical bilinear scale YV12 to RGB
*/
-static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *buf1, const uint16_t *ubuf0,
- const uint16_t *ubuf1, const uint16_t *vbuf0,
- const uint16_t *vbuf1, const uint16_t *abuf0,
- const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf[2], uint8_t *dest,
int dstW, int yalpha, int uvalpha, int y)
{
+ const int16_t *buf0 = buf[0], *buf1 = buf[1],
+ *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+ const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
#if ARCH_X86_64
__asm__ volatile(
YSCALEYUV2RGB(%%r8, %5)
@@ -1043,13 +1051,14 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
}
}
-static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *buf1, const uint16_t *ubuf0,
- const uint16_t *ubuf1, const uint16_t *vbuf0,
- const uint16_t *vbuf1, const uint16_t *abuf0,
- const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf[2], uint8_t *dest,
int dstW, int yalpha, int uvalpha, int y)
{
+ const int16_t *buf0 = buf[0], *buf1 = buf[1],
+ *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
@@ -1065,13 +1074,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
);
}
-static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *buf1, const uint16_t *ubuf0,
- const uint16_t *ubuf1, const uint16_t *vbuf0,
- const uint16_t *vbuf1, const uint16_t *abuf0,
- const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf[2], uint8_t *dest,
int dstW, int yalpha, int uvalpha, int y)
{
+ const int16_t *buf0 = buf[0], *buf1 = buf[1],
+ *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
@@ -1093,13 +1103,14 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
);
}
-static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *buf1, const uint16_t *ubuf0,
- const uint16_t *ubuf1, const uint16_t *vbuf0,
- const uint16_t *vbuf1, const uint16_t *abuf0,
- const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf[2], uint8_t *dest,
int dstW, int yalpha, int uvalpha, int y)
{
+ const int16_t *buf0 = buf[0], *buf1 = buf[1],
+ *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
@@ -1161,13 +1172,14 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
-static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *buf1, const uint16_t *ubuf0,
- const uint16_t *ubuf1, const uint16_t *vbuf0,
- const uint16_t *vbuf1, const uint16_t *abuf0,
- const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf[2], uint8_t *dest,
int dstW, int yalpha, int uvalpha, int y)
{
+ const int16_t *buf0 = buf[0], *buf1 = buf[1],
+ *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
@@ -1300,14 +1312,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
/**
* YV12 to RGB without scaling or interpolating
*/
-static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *ubuf0, const uint16_t *ubuf1,
- const uint16_t *vbuf0, const uint16_t *vbuf1,
- const uint16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, enum PixelFormat dstFormat,
- int flags, int y)
+static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t *bguf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int uvalpha, int y)
{
- const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+ const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
@@ -1368,14 +1379,13 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
}
}
-static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *ubuf0, const uint16_t *ubuf1,
- const uint16_t *vbuf0, const uint16_t *vbuf1,
- const uint16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, enum PixelFormat dstFormat,
- int flags, int y)
+static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t *bguf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int uvalpha, int y)
{
- const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+ const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
__asm__ volatile(
@@ -1406,14 +1416,13 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
}
}
-static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *ubuf0, const uint16_t *ubuf1,
- const uint16_t *vbuf0, const uint16_t *vbuf1,
- const uint16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, enum PixelFormat dstFormat,
- int flags, int y)
+static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t *bguf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int uvalpha, int y)
{
- const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+ const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
__asm__ volatile(
@@ -1456,14 +1465,13 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
}
}
-static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *ubuf0, const uint16_t *ubuf1,
- const uint16_t *vbuf0, const uint16_t *vbuf1,
- const uint16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, enum PixelFormat dstFormat,
- int flags, int y)
+static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t *bguf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int uvalpha, int y)
{
- const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+ const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
__asm__ volatile(
@@ -1543,14 +1551,13 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"psraw $7, %%mm7 \n\t"
#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
-static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
- const uint16_t *ubuf0, const uint16_t *ubuf1,
- const uint16_t *vbuf0, const uint16_t *vbuf1,
- const uint16_t *abuf0, uint8_t *dest,
- int dstW, int uvalpha, enum PixelFormat dstFormat,
- int flags, int y)
+static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t *bguf[2],
+ const int16_t *abuf0, uint8_t *dest,
+ int dstW, int uvalpha, int y)
{
- const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+ const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+ const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
__asm__ volatile(