diff options
author | Rong Yan <rongyan236@gmail.com> | 2014-11-11 06:03:01 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-11-11 11:34:38 +0100 |
commit | bb38cb14ccf4fb2b4f7ed867c2904c7a0c9c9ff4 (patch) | |
tree | eee95e53a7fe728df16e11390a7a2cf8611b88df /libavcodec/ppc | |
parent | a83af3fc33a6d858138c5ff772a01b220782eb95 (diff) | |
download | ffmpeg-bb38cb14ccf4fb2b4f7ed867c2904c7a0c9c9ff4.tar.gz |
libavcodec/ppc/me_cmp.c : factorize little and big endian code
add marcos GET_PERM() LOAD_PIX() for POWER LE
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/ppc')
-rw-r--r-- | libavcodec/ppc/me_cmp.c | 406 |
1 files changed, 55 insertions, 351 deletions
diff --git a/libavcodec/ppc/me_cmp.c b/libavcodec/ppc/me_cmp.c index 8ff8193d6d..d99fcd4703 100644 --- a/libavcodec/ppc/me_cmp.c +++ b/libavcodec/ppc/me_cmp.c @@ -35,64 +35,43 @@ #include "libavcodec/me_cmp.h" #if HAVE_ALTIVEC -#if HAVE_VSX -static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) -{ - int i, s = 0; - const vector unsigned char zero = - (const vector unsigned char) vec_splat_u8(0); - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); - vector signed int sumdiffs; - - for (i = 0; i < h; i++) { - /* Read unaligned pixels into our vectors. The vectors are as follows: - * pix1v: pix1[0] - pix1[15] - * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */ - vector unsigned char pix1v = vec_vsx_ld(0, pix1); - vector unsigned char pix2v = vec_vsx_ld(0, pix2); - vector unsigned char pix2iv = vec_vsx_ld(1, pix2); - - /* Calculate the average vector. */ - vector unsigned char avgv = vec_avg(pix2v, pix2iv); - - /* Calculate a sum of abs differences vector. */ - vector unsigned char t5 = vec_sub(vec_max(pix1v, avgv), - vec_min(pix1v, avgv)); - /* Add each 4 pixel group together and put 4 results into sad. */ - sad = vec_sum4s(t5, sad); - - pix1 += line_size; - pix2 += line_size; - } - /* Sum up the four partial sums, and put the result into s. */ - sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); - sumdiffs = vec_splat(sumdiffs, 3); - vec_vsx_st(sumdiffs, 0, &s); - return s; +#if HAVE_BIGENDIAN +#define GET_PERM(per1, per2, pix) {\ + per1 = vec_lvsl(0, pix);\ + per2 = vec_add(per1, vec_splat_u8(1));\ +} +#define LOAD_PIX(v, iv, pix, per1, per2) {\ + vector unsigned char pix2l = vec_ld(0, pix);\ + vector unsigned char pix2r = vec_ld(16, pix);\ + v = vec_perm(pix2l, pix2r, per1);\ + iv = vec_perm(pix2l, pix2r, per2);\ } #else +#define GET_PERM(per1, per2, pix) {} +#define LOAD_PIX(v, iv, pix, per1, per2) {\ + v = vec_vsx_ld(0, pix);\ + iv = vec_vsx_ld(1, pix);\ +} +#endif static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { - int i, s = 0; + int i; + int __attribute__((aligned(16))) s = 0; const vector unsigned char zero = (const vector unsigned char) vec_splat_u8(0); - vector unsigned char perm1 = vec_lvsl(0, pix2); - vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; + vector unsigned char perm1, perm2, pix2v, pix2iv; + GET_PERM(perm1, perm2, pix2); for (i = 0; i < h; i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: * pix1v: pix1[0] - pix1[15] * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */ vector unsigned char pix1v = vec_ld(0, pix1); - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(16, pix2); - vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm1); - vector unsigned char pix2iv = vec_perm(pix2l, pix2r, perm2); + LOAD_PIX(pix2v, pix2iv, pix2, perm1, perm2); /* Calculate the average vector. */ vector unsigned char avgv = vec_avg(pix2v, pix2iv); @@ -114,68 +93,18 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, return s; } -#endif /* HAVE_VSX */ -#if HAVE_VSX -static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) -{ - int i, s = 0; - const vector unsigned char zero = - (const vector unsigned char) vec_splat_u8(0); - vector unsigned char perm = vec_lvsl(0, pix2); - vector unsigned char pix1v, pix3v, avgv, t5; - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); - vector signed int sumdiffs; - uint8_t *pix3 = pix2 + line_size; - - /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one - * iteration becomes pix2 in the next iteration. We can use this - * fact to avoid a potentially expensive unaligned read, each - * time around the loop. - * Read unaligned pixels into our vectors. The vectors are as follows: - * pix2v: pix2[0] - pix2[15] - * Split the pixel vectors into shorts. */ - vector unsigned char pix2v = vec_vsx_ld(0, pix2); - - for (i = 0; i < h; i++) { - /* Read unaligned pixels into our vectors. The vectors are as follows: - * pix1v: pix1[0] - pix1[15] - * pix3v: pix3[0] - pix3[15] */ - pix1v = vec_vsx_ld(0, pix1); - pix3v = vec_vsx_ld(0, pix3); - - /* Calculate the average vector. */ - avgv = vec_avg(pix2v, pix3v); - - /* Calculate a sum of abs differences vector. */ - t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); - - /* Add each 4 pixel group together and put 4 results into sad. */ - sad = vec_sum4s(t5, sad); - - pix1 += line_size; - pix2v = pix3v; - pix3 += line_size; - } - - /* Sum up the four partial sums, and put the result into s. */ - sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); - sumdiffs = vec_splat(sumdiffs, 3); - vec_vsx_st(sumdiffs, 0, &s); - return s; -} -#else static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { - int i, s = 0; + int i; + int __attribute__((aligned(16))) s = 0; const vector unsigned char zero = (const vector unsigned char) vec_splat_u8(0); - vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned char pix1v, pix3v, avgv, t5; vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; + uint8_t *pix3 = pix2 + line_size; /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one @@ -185,19 +114,14 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, * Read unaligned pixels into our vectors. The vectors are as follows: * pix2v: pix2[0] - pix2[15] * Split the pixel vectors into shorts. */ - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(15, pix2); - vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm); + vector unsigned char pix2v = VEC_LD(0, pix2); for (i = 0; i < h; i++) { /* Read unaligned pixels into our vectors. The vectors are as follows: * pix1v: pix1[0] - pix1[15] * pix3v: pix3[0] - pix3[15] */ pix1v = vec_ld(0, pix1); - - pix2l = vec_ld(0, pix3); - pix2r = vec_ld(15, pix3); - pix3v = vec_perm(pix2l, pix2r, perm); + pix3v = VEC_LD(0, pix3); /* Calculate the average vector. */ avgv = vec_avg(pix2v, pix3v); @@ -219,113 +143,25 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, vec_ste(sumdiffs, 0, &s); return s; } -#endif /* HAVE_VSX */ -#if HAVE_VSX -static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) -{ - int i, s = 0; - uint8_t *pix3 = pix2 + line_size; - const vector unsigned char zero = - (const vector unsigned char) vec_splat_u8(0); - const vector unsigned short two = - (const vector unsigned short) vec_splat_u16(2); - vector unsigned char avgv, t5; - vector unsigned char pix1v, pix3v, pix3iv; - vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; - vector unsigned short avghv, avglv; - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); - vector signed int sumdiffs; - - /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one - * iteration becomes pix2 in the next iteration. We can use this - * fact to avoid a potentially expensive unaligned read, as well - * as some splitting, and vector addition each time around the loop. - * Read unaligned pixels into our vectors. The vectors are as follows: - * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] - * Split the pixel vectors into shorts. */ - vector unsigned char pix2v = vec_vsx_ld(0, pix2); - vector unsigned char pix2iv = vec_vsx_ld(1, pix2); - vector unsigned short pix2hv = - (vector unsigned short) vec_mergeh(pix2v, zero); - vector unsigned short pix2lv = - (vector unsigned short) vec_mergel(pix2v, zero); - vector unsigned short pix2ihv = - (vector unsigned short) vec_mergeh(pix2iv, zero); - vector unsigned short pix2ilv = - (vector unsigned short) vec_mergel(pix2iv, zero); - vector unsigned short t1 = vec_add(pix2hv, pix2ihv); - vector unsigned short t2 = vec_add(pix2lv, pix2ilv); - - vector unsigned short t3, t4; - - for (i = 0; i < h; i++) { - /* Read unaligned pixels into our vectors. The vectors are as follows: - * pix1v: pix1[0] - pix1[15] - * pix3v: pix3[0] - pix3[15] pix3iv: pix3[1] - pix3[16] */ - pix1v = vec_vsx_ld(0, pix1); - pix3v = vec_vsx_ld(0, pix3); - pix3iv = vec_vsx_ld(1, pix3); - - /* Note that AltiVec does have vec_avg, but this works on vector pairs - * and rounds up. We could do avg(avg(a, b), avg(c, d)), but the - * rounding would mean that, for example, avg(3, 0, 0, 1) = 2, when - * it should be 1. Instead, we have to split the pixel vectors into - * vectors of shorts and do the averaging by hand. */ - - /* Split the pixel vectors into shorts. */ - pix3hv = (vector unsigned short) vec_mergeh(pix3v, zero); - pix3lv = (vector unsigned short) vec_mergel(pix3v, zero); - pix3ihv = (vector unsigned short) vec_mergeh(pix3iv, zero); - pix3ilv = (vector unsigned short) vec_mergel(pix3iv, zero); - - /* Do the averaging on them. */ - t3 = vec_add(pix3hv, pix3ihv); - t4 = vec_add(pix3lv, pix3ilv); - - avghv = vec_sr(vec_add(vec_add(t1, t3), two), two); - avglv = vec_sr(vec_add(vec_add(t2, t4), two), two); - - /* Pack the shorts back into a result. */ - avgv = vec_pack(avghv, avglv); - - /* Calculate a sum of abs differences vector. */ - t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); - - /* Add each 4 pixel group together and put 4 results into sad. */ - sad = vec_sum4s(t5, sad); - - pix1 += line_size; - pix3 += line_size; - /* Transfer the calculated values for pix3 into pix2. */ - t1 = t3; - t2 = t4; - } - /* Sum up the four partial sums, and put the result into s. */ - sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); - sumdiffs = vec_splat(sumdiffs, 3); - vec_vsx_st(sumdiffs, 0, &s); - return s; -} -#else static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { - int i, s = 0; + int i; + int __attribute__((aligned(16))) s = 0; uint8_t *pix3 = pix2 + line_size; const vector unsigned char zero = (const vector unsigned char) vec_splat_u8(0); const vector unsigned short two = (const vector unsigned short) vec_splat_u16(2); vector unsigned char avgv, t5; - vector unsigned char perm1 = vec_lvsl(0, pix2); - vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); vector unsigned char pix1v, pix3v, pix3iv; vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; vector unsigned short avghv, avglv; vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; + vector unsigned char perm1, perm2, pix2v, pix2iv; + GET_PERM(perm1, perm2, pix2); /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one * iteration becomes pix2 in the next iteration. We can use this @@ -334,19 +170,16 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, * Read unaligned pixels into our vectors. The vectors are as follows: * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] * Split the pixel vectors into shorts. */ - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(16, pix2); - vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm1); - vector unsigned char pix2iv = vec_perm(pix2l, pix2r, perm2); - + LOAD_PIX(pix2v, pix2iv, pix2, perm1, perm2); vector unsigned short pix2hv = - (vector unsigned short) vec_mergeh(zero, pix2v); + (vector unsigned short) VEC_MERGEH(zero, pix2v); vector unsigned short pix2lv = - (vector unsigned short) vec_mergel(zero, pix2v); + (vector unsigned short) VEC_MERGEL(zero, pix2v); vector unsigned short pix2ihv = - (vector unsigned short) vec_mergeh(zero, pix2iv); + (vector unsigned short) VEC_MERGEH(zero, pix2iv); vector unsigned short pix2ilv = - (vector unsigned short) vec_mergel(zero, pix2iv); + (vector unsigned short) VEC_MERGEL(zero, pix2iv); + vector unsigned short t1 = vec_add(pix2hv, pix2ihv); vector unsigned short t2 = vec_add(pix2lv, pix2ilv); vector unsigned short t3, t4; @@ -356,11 +189,7 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, * pix1v: pix1[0] - pix1[15] * pix3v: pix3[0] - pix3[15] pix3iv: pix3[1] - pix3[16] */ pix1v = vec_ld(0, pix1); - - pix2l = vec_ld(0, pix3); - pix2r = vec_ld(16, pix3); - pix3v = vec_perm(pix2l, pix2r, perm1); - pix3iv = vec_perm(pix2l, pix2r, perm2); + LOAD_PIX(pix3v, pix3iv, pix3, perm1, perm2); /* Note that AltiVec does have vec_avg, but this works on vector pairs * and rounds up. We could do avg(avg(a, b), avg(c, d)), but the @@ -369,10 +198,10 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, * vectors of shorts and do the averaging by hand. */ /* Split the pixel vectors into shorts. */ - pix3hv = (vector unsigned short) vec_mergeh(zero, pix3v); - pix3lv = (vector unsigned short) vec_mergel(zero, pix3v); - pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv); - pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv); + pix3hv = (vector unsigned short) VEC_MERGEH(zero, pix3v); + pix3lv = (vector unsigned short) VEC_MERGEL(zero, pix3v); + pix3ihv = (vector unsigned short) VEC_MERGEH(zero, pix3iv); + pix3ilv = (vector unsigned short) VEC_MERGEL(zero, pix3iv); /* Do the averaging on them. */ t3 = vec_add(pix3hv, pix3ihv); @@ -403,58 +232,21 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, return s; } -#endif /* HAVE_VSX */ -#if HAVE_VSX static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { - int i, s; - const vector unsigned int zero = - (const vector unsigned int) vec_splat_u32(0); - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); - vector signed int sumdiffs; - - for (i = 0; i < h; i++) { - /* Read potentially unaligned pixels into t1 and t2. */ - vector unsigned char t1 =vec_vsx_ld(0, pix1); - vector unsigned char t2 = vec_vsx_ld(0, pix2); - - /* Calculate a sum of abs differences vector. */ - vector unsigned char t3 = vec_max(t1, t2); - vector unsigned char t4 = vec_min(t1, t2); - vector unsigned char t5 = vec_sub(t3, t4); - - /* Add each 4 pixel group together and put 4 results into sad. */ - sad = vec_sum4s(t5, sad); - - pix1 += line_size; - pix2 += line_size; - } - - /* Sum up the four partial sums, and put the result into s. */ - sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); - sumdiffs = vec_splat(sumdiffs, 3); - vec_vsx_st(sumdiffs, 0, &s); - return s; -} -#else -static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) -{ - int i, s; + int i; + int __attribute__((aligned(16))) s; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); - vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; for (i = 0; i < h; i++) { /* Read potentially unaligned pixels into t1 and t2. */ - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(15, pix2); - vector unsigned char t1 = vec_ld(0, pix1); - vector unsigned char t2 = vec_perm(pix2l, pix2r, perm); + vector unsigned char t1 =vec_ld(0, pix1); + vector unsigned char t2 = VEC_LD(0, pix2); /* Calculate a sum of abs differences vector. */ vector unsigned char t3 = vec_max(t1, t2); @@ -475,60 +267,17 @@ static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, return s; } -#endif /* HAVE_VSX */ -#if HAVE_VSX -static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) -{ - int i, s; - const vector unsigned int zero = - (const vector unsigned int) vec_splat_u32(0); - const vector unsigned char permclear = - (vector unsigned char) - { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 }; - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); - vector signed int sumdiffs; - - for (i = 0; i < h; i++) { - /* Read potentially unaligned pixels into t1 and t2. - * Since we're reading 16 pixels, and actually only want 8, - * mask out the last 8 pixels. The 0s don't change the sum. */ - vector unsigned char pix1l = vec_vsx_ld(0, pix1); - vector unsigned char pix2l = vec_vsx_ld(0, pix2); - vector unsigned char t1 = vec_and(pix1l, permclear); - vector unsigned char t2 = vec_and(pix2l, permclear); - - /* Calculate a sum of abs differences vector. */ - vector unsigned char t3 = vec_max(t1, t2); - vector unsigned char t4 = vec_min(t1, t2); - vector unsigned char t5 = vec_sub(t3, t4); - - /* Add each 4 pixel group together and put 4 results into sad. */ - sad = vec_sum4s(t5, sad); - - pix1 += line_size; - pix2 += line_size; - } - - /* Sum up the four partial sums, and put the result into s. */ - sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); - sumdiffs = vec_splat(sumdiffs, 3); - vec_vsx_st(sumdiffs, 0, &s); - return s; -} -#else static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { - int i, s; + int i; + int __attribute__((aligned(16))) s; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); const vector unsigned char permclear = (vector unsigned char) { 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 }; - vector unsigned char perm1 = vec_lvsl(0, pix1); - vector unsigned char perm2 = vec_lvsl(0, pix2); vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); vector signed int sumdiffs; @@ -536,14 +285,10 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Read potentially unaligned pixels into t1 and t2. * Since we're reading 16 pixels, and actually only want 8, * mask out the last 8 pixels. The 0s don't change the sum. */ - vector unsigned char pix1l = vec_ld(0, pix1); - vector unsigned char pix1r = vec_ld(7, pix1); - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(7, pix2); - vector unsigned char t1 = vec_and(vec_perm(pix1l, pix1r, perm1), - permclear); - vector unsigned char t2 = vec_and(vec_perm(pix2l, pix2r, perm2), - permclear); + vector unsigned char pix1l = VEC_LD(0, pix1); + vector unsigned char pix2l = VEC_LD(0, pix2); + vector unsigned char t1 = vec_and(pix1l, permclear); + vector unsigned char t2 = vec_and(pix2l, permclear); /* Calculate a sum of abs differences vector. */ vector unsigned char t3 = vec_max(t1, t2); @@ -564,7 +309,6 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, return s; } -#endif /* HAVE_VSX */ /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced. * It's the sad8_altivec code above w/ squaring added. */ @@ -620,59 +364,20 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Sum of Squared Errors for a 16x16 block, AltiVec-enhanced. * It's the sad16_altivec code above w/ squaring added. */ -#if HAVE_VSX -static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - int line_size, int h) -{ - int i, s; - const vector unsigned int zero = - (const vector unsigned int) vec_splat_u32(0); - vector unsigned int sum = (vector unsigned int) vec_splat_u32(0); - vector signed int sumsqr; - - for (i = 0; i < h; i++) { - /* Read potentially unaligned pixels into t1 and t2. */ - vector unsigned char t1 = vec_vsx_ld(0, pix1); - vector unsigned char t2 = vec_vsx_ld(0, pix2); - - /* Since we want to use unsigned chars, we can take advantage - * * of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */ - - /* Calculate abs differences vector. */ - vector unsigned char t3 = vec_max(t1, t2); - vector unsigned char t4 = vec_min(t1, t2); - vector unsigned char t5 = vec_sub(t3, t4); - - /* Square the values and add them to our sum. */ - sum = vec_msum(t5, t5, sum); - - pix1 += line_size; - pix2 += line_size; - } - - /* Sum up the four partial sums, and put the result into s. */ - sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); - sumsqr = vec_splat(sumsqr, 3); - vec_vsx_st(sumsqr, 0, &s); - return s; -} -#else static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { - int i, s; + int i; + int __attribute__((aligned(16))) s; const vector unsigned int zero = (const vector unsigned int) vec_splat_u32(0); - vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned int sum = (vector unsigned int) vec_splat_u32(0); vector signed int sumsqr; for (i = 0; i < h; i++) { /* Read potentially unaligned pixels into t1 and t2. */ - vector unsigned char pix2l = vec_ld(0, pix2); - vector unsigned char pix2r = vec_ld(15, pix2); vector unsigned char t1 = vec_ld(0, pix1); - vector unsigned char t2 = vec_perm(pix2l, pix2r, perm); + vector unsigned char t2 = VEC_LD(0, pix2); /* Since we want to use unsigned chars, we can take advantage * of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */ @@ -692,11 +397,10 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, /* Sum up the four partial sums, and put the result into s. */ sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero); sumsqr = vec_splat(sumsqr, 3); - vec_ste(sumsqr, 0, &s); + vec_ste(sumsqr, 0, &s); return s; } -#endif /* HAVE_VSX */ static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, uint8_t *src, int stride, int h) |