aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libswscale/ppc/swscale_altivec.c2
-rw-r--r--libswscale/swscale.c256
-rw-r--r--libswscale/swscale_internal.h44
3 files changed, 102 insertions, 200 deletions
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index e73750f396..3bf9458296 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -405,7 +405,7 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 &&
!c->alpPixBuf) {
- c->yuv2yuvX = yuv2yuvX_altivec_real;
+// c->yuv2yuvX = yuv2yuvX_altivec_real;
}
/* The following list of supported dstFormat values should
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index bac2932f65..1d9b965ac9 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -196,141 +196,67 @@ DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
{ 64, 64, 64, 64, 64, 64, 64, 64 };
static av_always_inline void
-yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
- int lumFilterSize, const int16_t *chrFilter,
- const int32_t **chrUSrc, const int32_t **chrVSrc,
- int chrFilterSize, const int32_t **alpSrc,
- uint16_t *dest[4], int dstW, int chrDstW,
+yuv2yuvX16_c_template(const int16_t *filter, int filterSize,
+ const int32_t **src, uint16_t *dest, int dstW,
int big_endian, int output_bits)
{
- //FIXME Optimize (just quickly written not optimized..)
- int i;
- uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
- *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
- int shift = 15 + 16 - output_bits - 1;
-
#define output_pixel(pos, val) \
if (big_endian) { \
AV_WB16(pos, av_clip_uint16(val >> shift)); \
} else { \
AV_WL16(pos, av_clip_uint16(val >> shift)); \
}
+
+ int i;
+ int shift = 15 + 16 - output_bits - 1;
+
for (i = 0; i < dstW; i++) {
int val = 1 << (30-output_bits - 1);
int j;
- for (j = 0; j < lumFilterSize; j++)
- val += (lumSrc[j][i] * lumFilter[j]) >> 1;
+ for (j = 0; j < filterSize; j++)
+ val += (src[j][i] * filter[j]) >> 1;
- output_pixel(&yDest[i], val);
- }
-
- if (uDest) {
- for (i = 0; i < chrDstW; i++) {
- int u = 1 << (30-output_bits - 1);
- int v = 1 << (30-output_bits - 1);
- int j;
-
- for (j = 0; j < chrFilterSize; j++) {
- u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
- v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
- }
-
- output_pixel(&uDest[i], u);
- output_pixel(&vDest[i], v);
- }
- }
-
- if (CONFIG_SWSCALE_ALPHA && aDest) {
- for (i = 0; i < dstW; i++) {
- int val = 1 << (30-output_bits - 1);
- int j;
-
- for (j = 0; j < lumFilterSize; j++)
- val += (alpSrc[j][i] * lumFilter[j]) >> 1;
-
- output_pixel(&aDest[i], val);
- }
+ output_pixel(&dest[i], val);
}
#undef output_pixel
}
-static av_always_inline void
-yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
- int lumFilterSize, const int16_t *chrFilter,
- const int16_t **chrUSrc, const int16_t **chrVSrc,
- int chrFilterSize, const int16_t **alpSrc,
- uint16_t *dest[4], int dstW, int chrDstW,
- int big_endian, int output_bits)
-{
- //FIXME Optimize (just quickly written not optimized..)
- int i;
- uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
- *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
- int shift = 11 + 16 - output_bits;
-
#define output_pixel(pos, val) \
if (big_endian) { \
AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
} else { \
AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
}
+
+static av_always_inline void
+yuv2yuvX10_c_template(const int16_t *filter, int filterSize,
+ const int16_t **src, uint16_t *dest, int dstW,
+ int big_endian, int output_bits)
+{
+ int i;
+ int shift = 11 + 16 - output_bits;
+
for (i = 0; i < dstW; i++) {
int val = 1 << (26-output_bits);
int j;
- for (j = 0; j < lumFilterSize; j++)
- val += lumSrc[j][i] * lumFilter[j];
+ for (j = 0; j < filterSize; j++)
+ val += src[j][i] * filter[j];
- output_pixel(&yDest[i], val);
+ output_pixel(&dest[i], val);
}
+}
- if (uDest) {
- for (i = 0; i < chrDstW; i++) {
- int u = 1 << (26-output_bits);
- int v = 1 << (26-output_bits);
- int j;
-
- for (j = 0; j < chrFilterSize; j++) {
- u += chrUSrc[j][i] * chrFilter[j];
- v += chrVSrc[j][i] * chrFilter[j];
- }
-
- output_pixel(&uDest[i], u);
- output_pixel(&vDest[i], v);
- }
- }
-
- if (CONFIG_SWSCALE_ALPHA && aDest) {
- for (i = 0; i < dstW; i++) {
- int val = 1 << (26-output_bits);
- int j;
-
- for (j = 0; j < lumFilterSize; j++)
- val += alpSrc[j][i] * lumFilter[j];
-
- output_pixel(&aDest[i], val);
- }
- }
#undef output_pixel
-}
#define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \
-static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
- const int16_t **_lumSrc, int lumFilterSize, \
- const int16_t *chrFilter, const int16_t **_chrUSrc, \
- const int16_t **_chrVSrc, \
- int chrFilterSize, const int16_t **_alpSrc, \
- uint8_t *_dest[4], int dstW, int chrDstW) \
+static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
+ const int16_t **src, uint16_t *dest, int dstW, \
+ const uint8_t *dither, int offset)\
{ \
- const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \
- **chrUSrc = (const typeX_t **) _chrUSrc, \
- **chrVSrc = (const typeX_t **) _chrVSrc, \
- **alpSrc = (const typeX_t **) _alpSrc; \
- yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \
- chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
- alpSrc, (uint16_t **) _dest, \
- dstW, chrDstW, is_be, bits); \
+ yuv2yuvX_template_fn(filter, filterSize, (const typeX_t **) src, \
+ dest, dstW, is_be, bits); \
}
yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t);
yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t);
@@ -339,51 +265,19 @@ yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t);
yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t);
yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t);
-static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc,
- int chrFilterSize, const int16_t **alpSrc,
- uint8_t *dest[4], int dstW, int chrDstW)
+static void yuv2yuvX_c(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset)
{
- uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
- *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
int i;
- const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
-
- //FIXME Optimize (just quickly written not optimized..)
for (i=0; i<dstW; i++) {
- int val = lumDither[i & 7] << 12;
+ int val = dither[(i + offset) & 7] << 12;
int j;
- for (j=0; j<lumFilterSize; j++)
- val += lumSrc[j][i] * lumFilter[j];
+ for (j=0; j<filterSize; j++)
+ val += src[j][i] * filter[j];
- yDest[i]= av_clip_uint8(val>>19);
+ dest[i]= av_clip_uint8(val>>19);
}
-
- if (uDest)
- for (i=0; i<chrDstW; i++) {
- int u = chrDither[i & 7] << 12;
- int v = chrDither[(i + 3) & 7] << 12;
- int j;
- for (j=0; j<chrFilterSize; j++) {
- u += chrUSrc[j][i] * chrFilter[j];
- v += chrVSrc[j][i] * chrFilter[j];
- }
-
- uDest[i]= av_clip_uint8(u>>19);
- vDest[i]= av_clip_uint8(v>>19);
- }
-
- if (CONFIG_SWSCALE_ALPHA && aDest)
- for (i=0; i<dstW; i++) {
- int val = lumDither[i & 7] << 12;
- int j;
- for (j=0; j<lumFilterSize; j++)
- val += alpSrc[j][i] * lumFilter[j];
-
- aDest[i]= av_clip_uint8(val>>19);
- }
}
static void yuv2yuv1_c(const int16_t *src, uint8_t *dest, int dstW,
@@ -396,30 +290,13 @@ static void yuv2yuv1_c(const int16_t *src, uint8_t *dest, int dstW,
}
}
-static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc, int chrFilterSize,
- const int16_t **alpSrc, uint8_t *dest[4],
- int dstW, int chrDstW)
+static void yuv2nv12X_chroma_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
+ const int16_t **chrUSrc, const int16_t **chrVSrc,
+ uint8_t *dest, int chrDstW)
{
- uint8_t *yDest = dest[0], *uDest = dest[1];
enum PixelFormat dstFormat = c->dstFormat;
- const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
-
- //FIXME Optimize (just quickly written not optimized..)
+ const uint8_t *chrDither = c->chrDither8;
int i;
- for (i=0; i<dstW; i++) {
- int val = lumDither[i & 7] << 12;
- int j;
- for (j=0; j<lumFilterSize; j++)
- val += lumSrc[j][i] * lumFilter[j];
-
- yDest[i]= av_clip_uint8(val>>19);
- }
-
- if (!uDest)
- return;
if (dstFormat == PIX_FMT_NV12)
for (i=0; i<chrDstW; i++) {
@@ -431,8 +308,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
v += chrVSrc[j][i] * chrFilter[j];
}
- uDest[2*i]= av_clip_uint8(u>>19);
- uDest[2*i+1]= av_clip_uint8(v>>19);
+ dest[2*i]= av_clip_uint8(u>>19);
+ dest[2*i+1]= av_clip_uint8(v>>19);
}
else
for (i=0; i<chrDstW; i++) {
@@ -444,8 +321,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
v += chrVSrc[j][i] * chrFilter[j];
}
- uDest[2*i]= av_clip_uint8(v>>19);
- uDest[2*i+1]= av_clip_uint8(u>>19);
+ dest[2*i]= av_clip_uint8(v>>19);
+ dest[2*i+1]= av_clip_uint8(u>>19);
}
}
@@ -2117,26 +1994,29 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
static av_always_inline void
find_c_packed_planar_out_funcs(SwsContext *c,
- yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2yuvX,
+ yuv2planar1_fn *yuv2yuv1, yuv2planarX_fn *yuv2planeX_luma,
+ yuv2planarX_fn *yuv2planeX_chroma, yuv2interleavedX_fn *yuv2nv12X_chroma,
yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
yuv2packedX_fn *yuv2packedX)
{
enum PixelFormat dstFormat = c->dstFormat;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
- *yuv2yuvX = yuv2nv12X_c;
+ *yuv2planeX_luma = yuv2yuvX_c;
+ *yuv2nv12X_chroma = yuv2nv12X_chroma_c;
} else if (is16BPS(dstFormat)) {
- *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
+ *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX16BE_c : yuv2yuvX16LE_c;
} else if (is9_OR_10BPS(dstFormat)) {
if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
- *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
+ *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX9BE_c : yuv2yuvX9LE_c;
} else {
- *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
+ *yuv2planeX_luma = *yuv2planeX_chroma = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
}
} else {
*yuv2yuv1 = yuv2yuv1_c;
- *yuv2yuvX = yuv2yuvX_c;
+ *yuv2planeX_luma = *yuv2planeX_chroma = yuv2yuvX_c;
}
+
if(c->flags & SWS_FULL_CHR_H_INT) {
switch (dstFormat) {
case PIX_FMT_RGBA:
@@ -2396,7 +2276,9 @@ static int swScale(SwsContext *c, const uint8_t* src[],
int lastDstY;
uint32_t *pal=c->pal_yuv;
yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
- yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
+ yuv2planarX_fn yuv2planeX_luma = c->yuv2planeX_luma;
+ yuv2planarX_fn yuv2planeX_chroma = c->yuv2planeX_chroma;
+ yuv2interleavedX_fn yuv2nv12X_chroma = c->yuv2nv12X_chroma;
yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
yuv2packedX_fn yuv2packedX = c->yuv2packedX;
@@ -2548,8 +2430,8 @@ static int swScale(SwsContext *c, const uint8_t* src[],
}
if (dstY >= dstH-2) {
// hmm looks like we can't use MMX here without overwriting this array's tail
- find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
- &yuv2packed1, &yuv2packed2,
+ find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2planeX_luma, &yuv2planeX_chroma,
+ &yuv2nv12X_chroma, &yuv2packed1, &yuv2packed2,
&yuv2packedX);
}
@@ -2564,7 +2446,13 @@ static int swScale(SwsContext *c, const uint8_t* src[],
dest[1] = dest[2] = NULL; //FIXME split functions in lumi / chromi
const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
- if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
+ if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
+ yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
+
+ if (dest[1]){
+ yuv2nv12X_chroma(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
+ }
+ } else if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
yuv2yuv1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
if (dest[1]){
@@ -2575,11 +2463,15 @@ static int swScale(SwsContext *c, const uint8_t* src[],
if (alpBuf && dest[3])
yuv2yuv1(alpBuf, dest[3], dstW, c->lumDither8, 0);
} else { //General YV12
- yuv2yuvX(c, vLumFilter + dstY * vLumFilterSize,
- lumSrcPtr, vLumFilterSize,
- vChrFilter + chrDstY * vChrFilterSize,
- chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
- alpSrcPtr, dest, dstW, chrDstW);
+ yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
+
+ if (dest[1]){
+ yuv2planeX_chroma(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
+ yuv2planeX_chroma(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
+ }
+
+ if (alpBuf && dest[3])
+ yuv2planeX_luma(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
}
} else {
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
@@ -2633,8 +2525,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
{
enum PixelFormat srcFormat = c->srcFormat;
- find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
- &c->yuv2packed1, &c->yuv2packed2,
+ find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2planeX_luma, &c->yuv2planeX_chroma,
+ &c->yuv2nv12X_chroma, &c->yuv2packed1, &c->yuv2packed2,
&c->yuv2packedX);
c->chrToYV12 = NULL;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 7253c833fa..713045556e 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -76,33 +76,41 @@ typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
/**
- * Write one line of horizontally scaled Y/U/V/A to planar output
+ * Write one line of horizontally scaled data to planar output
* with multi-point vertical scaling between input pixels.
*
- * @param c SWS scaling context
- * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096]
- * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output,
+ * @param filter vertical luma/alpha scaling coefficients, 12bit [0,4096]
+ * @param src scaled luma (Y) or alpha (A) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
- * @param lumFilterSize number of vertical luma/alpha input lines to scale
+ * @param filterSize number of vertical input lines to scale
+ * @param dest pointer to output plane. For >8bit
+ * output, this is in uint16_t
+ * @param dstW width of destination pixels
+ * @param offset Dither offset
+ */
+typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset);
+
+/**
+ * Write one line of horizontally scaled chroma to interleaved output
+ * with multi-point vertical scaling between input pixels.
+ *
+ * @param c SWS scaling context
* @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096]
* @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output,
* 19-bit for 16bit output (in int32_t)
* @param chrFilterSize number of vertical chroma input lines to scale
- * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output,
- * 19-bit for 16bit output (in int32_t)
- * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit
+ * @param dest pointer to the output plane. For >8bit
* output, this is in uint16_t
- * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels
- * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
+ * @param dstW width of chroma planes
*/
-typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc, int chrFilterSize,
- const int16_t **alpSrc, uint8_t *dest[4],
- int dstW, int chrDstW);
+typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
+ const int16_t **chrUSrc, const int16_t **chrVSrc,
+ uint8_t *dest, int dstW);
+
/**
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
* output without any additional vertical scaling (or point-scaling). Note
@@ -405,7 +413,9 @@ typedef struct SwsContext {
/* function pointers for swScale() */
yuv2planar1_fn yuv2yuv1;
- yuv2planarX_fn yuv2yuvX;
+ yuv2planarX_fn yuv2planeX_luma;
+ yuv2planarX_fn yuv2planeX_chroma;
+ yuv2interleavedX_fn yuv2nv12X_chroma;
yuv2packed1_fn yuv2packed1;
yuv2packed2_fn yuv2packed2;
yuv2packedX_fn yuv2packedX;