aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlan Curry <pacman@world.std.com>2006-02-08 08:20:40 +0000
committerDiego Biurrun <diego@biurrun.de>2006-02-08 08:20:40 +0000
commit5edb653bca2cfc6792d0185ef4298e068375fb5a (patch)
tree1f051ac6029be683e747d404c3d7949af68a0a81
parentbe9d060d0c4f7e548bdb6ce96789b22bfd09a704 (diff)
downloadffmpeg-5edb653bca2cfc6792d0185ef4298e068375fb5a.tar.gz
AltiVec operations need to have memory aligned on 16-byte boundaries.
patch by Alan Curry, pacman at world dot std dot com Originally committed as revision 17559 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
-rw-r--r--postproc/swscale.c8
-rw-r--r--postproc/yuv2rgb_altivec.c7
2 files changed, 10 insertions, 5 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c
index fbdedaecd3..2b0d585b1a 100644
--- a/postproc/swscale.c
+++ b/postproc/swscale.c
@@ -1166,7 +1166,8 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
}
// Note the +1 is for the MMXscaler which reads over the end
- *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
+ /* align at 16 for AltiVec (needed by hScale_altivec_real) */
+ *outFilter= (int16_t*)memalign(16, *outFilterSize*(dstW+1)*sizeof(int16_t));
memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
/* Normalize & Store in outFilter */
@@ -2132,10 +2133,11 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
//Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
+ /* align at 16 bytes for AltiVec */
for(i=0; i<c->vLumBufSize; i++)
- c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
+ c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(16, 4000);
for(i=0; i<c->vChrBufSize; i++)
- c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
+ c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(16, 8000);
//try to avoid drawing green stuff between the right end and the stride end
for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
diff --git a/postproc/yuv2rgb_altivec.c b/postproc/yuv2rgb_altivec.c
index 16acc42564..69b5b302da 100644
--- a/postproc/yuv2rgb_altivec.c
+++ b/postproc/yuv2rgb_altivec.c
@@ -68,6 +68,9 @@
#include <inttypes.h>
#include <assert.h>
#include "config.h"
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
@@ -788,8 +791,8 @@ altivec_yuv2packedX (SwsContext *c,
vector signed short *YCoeffs, *CCoeffs;
- vYCoeffsBank = malloc (sizeof (vector signed short)*lumFilterSize*c->dstH);
- vCCoeffsBank = malloc (sizeof (vector signed short)*chrFilterSize*c->dstH);
+ vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
+ vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
for (i=0;i<lumFilterSize*c->dstH;i++) {
tmp = c->vLumFilter[i];