aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2011-06-16 12:04:24 -0400
committerRonald Bultje <rbultje@dhcp-172-22-79-135.mtv.corp.google.com>2011-06-27 18:05:16 -0700
commitdc179ec81902e3c9d327f9e818454f2849308000 (patch)
treed3a4df3cd24326fbf3aa95005035b7ba9fd28fe4
parent0d994b2f45c08794899057ee7ca54f48218c0a53 (diff)
downloadffmpeg-dc179ec81902e3c9d327f9e818454f2849308000.tar.gz
swscale: split yuv2packedX_altivec in smaller functions.
This will likely lead to a considerable performance boost, since it removes a branch from the inner loop. Part of the Great Evil Plan to simplify swscale.
-rw-r--r--libswscale/ppc/swscale_altivec.c14
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.c28
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.h18
3 files changed, 46 insertions, 14 deletions
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 47fe54c088..7161fe7963 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -414,10 +414,14 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
/* The following list of supported dstFormat values should
* match what's found in the body of ff_yuv2packedX_altivec() */
- if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf &&
- (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
- c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
- c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) {
- c->yuv2packedX = ff_yuv2packedX_altivec;
+ if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) {
+ switch (c->dstFormat) {
+ case PIX_FMT_ABGR: c->yuv2packedX = ff_yuv2abgr_X_altivec; break;
+ case PIX_FMT_BGRA: c->yuv2packedX = ff_yuv2bgra_X_altivec; break;
+ case PIX_FMT_ARGB: c->yuv2packedX = ff_yuv2argb_X_altivec; break;
+ case PIX_FMT_RGBA: c->yuv2packedX = ff_yuv2rgba_X_altivec; break;
+ case PIX_FMT_BGR24: c->yuv2packedX = ff_yuv2bgr24_X_altivec; break;
+ case PIX_FMT_RGB24: c->yuv2packedX = ff_yuv2rgb24_X_altivec; break;
}
+ }
}
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 476db22489..73c02e9494 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -626,13 +626,13 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
}
-void
+static av_always_inline void
ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
const int16_t **chrVSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest,
- int dstW, int dstY)
+ int dstW, int dstY, enum PixelFormat target)
{
int i,j;
vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
@@ -706,7 +706,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
G = vec_packclp (G0,G1);
B = vec_packclp (B0,B1);
- switch(c->dstFormat) {
+ switch(target) {
case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
@@ -785,7 +785,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
B = vec_packclp (B0,B1);
nout = (vector unsigned char *)scratch;
- switch(c->dstFormat) {
+ switch(target) {
case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
@@ -803,3 +803,23 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
}
}
+
+#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
+void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
+ const int16_t **lumSrc, int lumFilterSize, \
+ const int16_t *chrFilter, const int16_t **chrUSrc, \
+ const int16_t **chrVSrc, int chrFilterSize, \
+ const int16_t **alpSrc, uint8_t *dest, \
+ int dstW, int dstY) \
+{ \
+ ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
+ chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+ alpSrc, dest, dstW, dstY, pixfmt); \
+}
+
+YUV2PACKEDX_WRAPPER(abgr, PIX_FMT_ABGR);
+YUV2PACKEDX_WRAPPER(bgra, PIX_FMT_BGRA);
+YUV2PACKEDX_WRAPPER(argb, PIX_FMT_ARGB);
+YUV2PACKEDX_WRAPPER(rgba, PIX_FMT_RGBA);
+YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
+YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
index b54a856905..b809fe13fe 100644
--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -24,11 +24,19 @@
#ifndef PPC_YUV2RGB_ALTIVEC_H
#define PPC_YUV2RGB_ALTIVEC_H 1
-void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
- const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrUSrc,
- const int16_t **chrVSrc, int chrFilterSize,
- const int16_t **alpSrc, uint8_t *dest,
+#define YUV2PACKEDX_HEADER(suffix) \
+void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
+ const int16_t **lumSrc, int lumFilterSize, \
+ const int16_t *chrFilter, const int16_t **chrUSrc, \
+ const int16_t **chrVSrc, int chrFilterSize, \
+ const int16_t **alpSrc, uint8_t *dest, \
int dstW, int dstY);
+YUV2PACKEDX_HEADER(abgr);
+YUV2PACKEDX_HEADER(bgra);
+YUV2PACKEDX_HEADER(argb);
+YUV2PACKEDX_HEADER(rgba);
+YUV2PACKEDX_HEADER(rgb24);
+YUV2PACKEDX_HEADER(bgr24);
+
#endif /* PPC_YUV2RGB_ALTIVEC_H */