diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-02-14 21:59:56 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-02-14 22:00:12 +0100 |
commit | f11905763cda57ea7d58cb7d522988988c086429 (patch) | |
tree | 932ef8325aaf4fdbad3d89e624c28dc7664f6ed1 | |
parent | e136579ca358093caa6b0a4941717ae7ef1f90b2 (diff) | |
parent | 98fdfa99704f1cfef3d3a26c580b92749b6b64cb (diff) | |
download | ffmpeg-f11905763cda57ea7d58cb7d522988988c086429.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
ppc: reduce overreads when loading 8 pixels in altivec dsp functions
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/ppc/dsputil_altivec.c | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index f36e394fb2..8b676bc596 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -285,10 +285,10 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in /* Read potentially unaligned pixels into t1 and t2 Since we're reading 16 pixels, and actually only want 8, mask out the last 8 pixels. The 0s don't change the sum. */ - vector unsigned char pix1l = vec_ld( 0, pix1); - vector unsigned char pix1r = vec_ld(15, pix1); - vector unsigned char pix2l = vec_ld( 0, pix2); - vector unsigned char pix2r = vec_ld(15, pix2); + vector unsigned char pix1l = vec_ld(0, pix1); + vector unsigned char pix1r = vec_ld(7, pix1); + vector unsigned char pix2l = vec_ld(0, pix2); + vector unsigned char pix2r = vec_ld(7, pix2); t1 = vec_and(vec_perm(pix1l, pix1r, perm1), permclear); t2 = vec_and(vec_perm(pix2l, pix2r, perm2), permclear); @@ -367,10 +367,10 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in /* Read potentially unaligned pixels into t1 and t2 Since we're reading 16 pixels, and actually only want 8, mask out the last 8 pixels. The 0s don't change the sum. */ - vector unsigned char pix1l = vec_ld( 0, pix1); - vector unsigned char pix1r = vec_ld(15, pix1); - vector unsigned char pix2l = vec_ld( 0, pix2); - vector unsigned char pix2r = vec_ld(15, pix2); + vector unsigned char pix1l = vec_ld(0, pix1); + vector unsigned char pix1r = vec_ld(7, pix1); + vector unsigned char pix2l = vec_ld(0, pix2); + vector unsigned char pix2r = vec_ld(7, pix2); t1 = vec_and(vec_perm(pix1l, pix1r, perm1), permclear); t2 = vec_and(vec_perm(pix2l, pix2r, perm2), permclear); @@ -489,8 +489,8 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, i // Read potentially unaligned pixels. // We're reading 16 pixels, and actually only want 8, // but we simply ignore the extras. - vector unsigned char pixl = vec_ld( 0, pixels); - vector unsigned char pixr = vec_ld(15, pixels); + vector unsigned char pixl = vec_ld(0, pixels); + vector unsigned char pixr = vec_ld(7, pixels); bytes = vec_perm(pixl, pixr, perm); // convert the bytes into shorts |