aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale/ppc/swscale_altivec_template.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-05-28 15:52:50 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-05-29 02:13:36 +0200
commit986f0d86cbdc92f46e5fbba05fb29526b76162be (patch)
tree6f1a2fd291a6930528a806c9b68c530f9d22f392 /libswscale/ppc/swscale_altivec_template.c
parentea535ed50d1b8d751e2d194a987295ab38daf1a2 (diff)
downloadffmpeg-986f0d86cbdc92f46e5fbba05fb29526b76162be.tar.gz
Commits that could not be pulled earlier due to bugs.
commit 93681fbd5082a3af896b7a730dacdd27a3052406 Author: Ronald S. Bultje <rsbultje@gmail.com> Date: Thu May 26 11:32:32 2011 -0400 swscale: fix compile on ppc. commit e758573a887cfb1155e81499ca54f433127cf24e Author: Ronald S. Bultje <rsbultje@gmail.com> Date: Thu May 26 10:36:47 2011 -0400 swscale: fix compile on x86-32. commit 0f4eb8b04341081591bf401eaa2c07d6bc3ff52e Author: Ronald S. Bultje <rsbultje@gmail.com> Date: Thu May 26 09:17:52 2011 -0400 swscale: remove VOF/VOFW. commit b4a224c5e4109cb2cca8bac38628673d685fe763 Author: Ronald S. Bultje <rsbultje@gmail.com> Date: Wed May 25 14:30:09 2011 -0400 swscale: split chroma buffers into separate U/V planes. Preparatory step to implement support for sizes > VOFW.
Diffstat (limited to 'libswscale/ppc/swscale_altivec_template.c')
-rw-r--r--libswscale/ppc/swscale_altivec_template.c30
1 files changed, 16 insertions, 14 deletions
diff --git a/libswscale/ppc/swscale_altivec_template.c b/libswscale/ppc/swscale_altivec_template.c
index c7aa0fd2e6..d142c62e61 100644
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@@ -86,9 +86,11 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
}
static inline void
-yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
- const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
+yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc,
+ int lumFilterSize, const int16_t *chrFilter,
+ const int16_t **chrUSrc, const int16_t **chrVSrc,
+ int chrFilterSize, uint8_t *dest, uint8_t *uDest,
+ uint8_t *vDest, int dstW, int chrDstW)
{
const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
register int i, j;
@@ -159,22 +161,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
- perm = vec_lvsl(0, chrSrc[j]);
- l1 = vec_ld(0, chrSrc[j]);
- l1_V = vec_ld(VOFW << 1, chrSrc[j]);
+ perm = vec_lvsl(0, chrUSrc[j]);
+ l1 = vec_ld(0, chrUSrc[j]);
+ l1_V = vec_ld(0, chrVSrc[j]);
for (i = 0; i < (chrDstW - 7); i+=8) {
int offset = i << 2;
- vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
- vector signed short l2_V = vec_ld(((i + VOFW) << 1) + 16, chrSrc[j]);
+ vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]);
+ vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]);
vector signed int v1 = vec_ld(offset, u);
vector signed int v2 = vec_ld(offset + 16, u);
vector signed int v1_V = vec_ld(offset, v);
vector signed int v2_V = vec_ld(offset + 16, v);
- vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7]
- vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+VOFW] ... chrSrc[j][i+2055]
+ vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7]
+ vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i]
vector signed int i1 = vec_mule(vChrFilter, ls);
vector signed int i2 = vec_mulo(vChrFilter, ls);
@@ -182,9 +184,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
vector signed int vf1 = vec_mergeh(i1, i2);
- vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+ vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j]
vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
- vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
+ vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j]
vector signed int vo1 = vec_add(v1, vf1);
vector signed int vo2 = vec_add(v2, vf2);
@@ -200,8 +202,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF
l1_V = l2_V;
}
for ( ; i < chrDstW; i++) {
- u[i] += chrSrc[j][i] * chrFilter[j];
- v[i] += chrSrc[j][i + VOFW] * chrFilter[j];
+ u[i] += chrUSrc[j][i] * chrFilter[j];
+ v[i] += chrVSrc[j][i] * chrFilter[j];
}
}
altivec_packIntArrayToCharArray(u, uDest, chrDstW);