diff options
author | Romain Dolbeau <dolbeau@irisa.fr> | 2003-01-20 22:50:14 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2003-01-20 22:50:14 +0000 |
commit | fe50f38577887ea8e1a66a95503a97438e2d768f (patch) | |
tree | e1be5191e5977504808a59d8a9e1de10675f6fbc | |
parent | 566986ee1913ee5567b7dce31229c23f22162120 (diff) | |
download | ffmpeg-fe50f38577887ea8e1a66a95503a97438e2d768f.tar.gz |
More AltiVec MC functions patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
Originally committed as revision 1485 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/ppc/dsputil_altivec.c | 451 | ||||
-rw-r--r-- | libavcodec/ppc/dsputil_altivec.h | 3 | ||||
-rw-r--r-- | libavcodec/ppc/dsputil_ppc.c | 28 | ||||
-rw-r--r-- | libavcodec/ppc/dsputil_ppc.h | 4 |
4 files changed, 389 insertions, 97 deletions
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index fcae7c1180..536d5f1920 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2002 Brian Foley * Copyright (c) 2002 Dieter Shirley + * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -628,86 +629,7 @@ void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } -extern UINT8 cropTbl[]; -void put_pixels_clamped_altivec(const DCTELEM *block, UINT8 *restrict pixels, - int line_size) -{ -POWERPC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1); -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - int i; - UINT8 *cm = cropTbl + MAX_NEG_CROP; - -POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); - - /* read the pixels */ - for(i=0;i<8;i++) { - pixels[0] = cm[block[0]]; - pixels[1] = cm[block[1]]; - pixels[2] = cm[block[2]]; - pixels[3] = cm[block[3]]; - pixels[4] = cm[block[4]]; - pixels[5] = cm[block[5]]; - pixels[6] = cm[block[6]]; - pixels[7] = cm[block[7]]; - - pixels += line_size; - block += 8; - } - -POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); - -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ - register const vector short vczero = (const vector short)(0); - register vector short - blockv0, blockv1, blockv2, blockv3, - blockv4, blockv5, blockv6, blockv7; - register vector unsigned char - pixelsv0, pixelsv1, pixelsv2, pixelsv3, pixelsv4, - pixelsv0old, pixelsv4old; - -POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); - - blockv0 = vec_ld(0, block); - blockv1 = vec_ld(16, block); - blockv2 = vec_ld(32, block); - blockv3 = vec_ld(48, block); - blockv4 = vec_ld(64, block); - blockv5 = vec_ld(80, block); - blockv6 = vec_ld(96, block); - blockv7 = vec_ld(112, block); - if (((unsigned long)pixels) & 0x0000000F) - { - pixelsv0old = vec_ld(-8, pixels); - pixelsv4old = vec_ld(56, pixels); - pixelsv0 = vec_packsu(vczero, blockv0); - pixelsv1 = vec_packsu(blockv1, blockv2); - pixelsv2 = vec_packsu(blockv3, blockv4); - pixelsv3 = vec_packsu(blockv5, blockv6); - pixelsv4 = vec_packsu(blockv5, vczero); - pixelsv0 = vec_perm(pixelsv0old, pixelsv0, vcprm(0, 1, s2, s3)); - pixelsv4 = vec_perm(pixelsv4, pixelsv4old, vcprm(0, 1, s2, s3)); - vec_st(pixelsv0, -8, pixels); - vec_st(pixelsv1, 8, pixels); - vec_st(pixelsv2, 24, pixels); - vec_st(pixelsv3, 40, pixels); - vec_st(pixelsv4, 56, pixels); - } - else - { - pixelsv0 = vec_packsu(blockv0, blockv1); - pixelsv1 = vec_packsu(blockv2, blockv3); - pixelsv2 = vec_packsu(blockv4, blockv5); - pixelsv3 = vec_packsu(blockv6, blockv7); - vec_st(pixelsv0, 0, pixels); - vec_st(pixelsv1, 16, pixels); - vec_st(pixelsv2, 32, pixels); - vec_st(pixelsv3, 48, pixels); - } - -POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ -} - +/* next one assumes that ((line_size % 16) == 0) */ void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1); @@ -729,6 +651,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ register vector unsigned char pixelsv1, pixelsv2; + register vector unsigned char perm = vec_lvsl(0, pixels); int i; POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); @@ -736,7 +659,7 @@ POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); for(i=0; i<h; i++) { pixelsv1 = vec_ld(0, (unsigned char*)pixels); pixelsv2 = vec_ld(16, (unsigned char*)pixels); - vec_st(vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)), + vec_st(vec_perm(pixelsv1, pixelsv2, perm), 0, (unsigned char*)block); pixels+=line_size; block +=line_size; @@ -747,6 +670,7 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } +/* next one assumes that ((line_size % 16) == 0) */ #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { @@ -769,6 +693,7 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; + register vector unsigned char perm = vec_lvsl(0, pixels); int i; POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); @@ -777,7 +702,7 @@ POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); pixelsv1 = vec_ld(0, (unsigned char*)pixels); pixelsv2 = vec_ld(16, (unsigned char*)pixels); blockv = vec_ld(0, block); - pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); + pixelsv = vec_perm(pixelsv1, pixelsv2, perm); blockv = vec_avg(blockv,pixelsv); vec_st(blockv, 0, (unsigned char*)block); pixels+=line_size; @@ -789,8 +714,8 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } -void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, - int line_size, int h) +/* next one assumes that ((line_size % 8) == 0) */ +void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) { POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1); #ifdef ALTIVEC_USE_REFERENCE_C_CODE @@ -855,6 +780,7 @@ POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } +/* next one assumes that ((line_size % 8) == 0) */ void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1); @@ -969,6 +895,363 @@ POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } +/* next one assumes that ((line_size % 8) == 0) */ +void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); + for (j = 0; j < 2; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, + pixelsavg; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3; + register const vector unsigned char vczero = (const vector unsigned char)(0); + register const vector unsigned short vcone = (const vector unsigned short)(1); + register const vector unsigned short vctwo = (const vector unsigned short)(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vcone); + +POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); + for (i = 0; i < h ; i++) { + int rightside = ((unsigned long)block & 0x0000000F); + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + pixelssum1 = vec_add(pixelssum2, vcone); + pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); + + if (rightside) + { + blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); + } + else + { + blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); + } + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 16) == 0) */ +void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_pixels16_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); + for (j = 0; j < 4; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, pixelsv3, pixelsv4; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3, + pixelssum3, pixelssum4, temp4; + register const vector unsigned char vczero = (const vector unsigned char)(0); + register const vector unsigned short vctwo = (const vector unsigned short)(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv3 = vec_mergel(vczero, pixelsv1); + pixelsv4 = vec_mergel(vczero, pixelsv2); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum3 = vec_add((vector unsigned short)pixelsv3, + (vector unsigned short)pixelsv4); + pixelssum3 = vec_add(pixelssum3, vctwo); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vctwo); + +POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1); + for (i = 0; i < h ; i++) { + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv3 = vec_mergel(vczero, pixelsv1); + pixelsv4 = vec_mergel(vczero, pixelsv2); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + + pixelssum4 = vec_add((vector unsigned short)pixelsv3, + (vector unsigned short)pixelsv4); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp4 = vec_add(pixelssum3, pixelssum4); + temp4 = vec_sra(temp4, vctwo); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + + pixelssum3 = vec_add(pixelssum4, vctwo); + pixelssum1 = vec_add(pixelssum2, vctwo); + + blockv = vec_packsu(temp3, temp4); + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +/* next one assumes that ((line_size % 16) == 0) */ +void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); + for (j = 0; j < 4; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x01010101UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, pixelsv3, pixelsv4; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3, + pixelssum3, pixelssum4, temp4; + register const vector unsigned char vczero = (const vector unsigned char)(0); + register const vector unsigned short vcone = (const vector unsigned short)(1); + register const vector unsigned short vctwo = (const vector unsigned short)(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv3 = vec_mergel(vczero, pixelsv1); + pixelsv4 = vec_mergel(vczero, pixelsv2); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum3 = vec_add((vector unsigned short)pixelsv3, + (vector unsigned short)pixelsv4); + pixelssum3 = vec_add(pixelssum3, vcone); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vcone); + +POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); + for (i = 0; i < h ; i++) { + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv3 = vec_mergel(vczero, pixelsv1); + pixelsv4 = vec_mergel(vczero, pixelsv2); + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + + pixelssum4 = vec_add((vector unsigned short)pixelsv3, + (vector unsigned short)pixelsv4); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp4 = vec_add(pixelssum3, pixelssum4); + temp4 = vec_sra(temp4, vctwo); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + + pixelssum3 = vec_add(pixelssum4, vcone); + pixelssum1 = vec_add(pixelssum2, vcone); + + blockv = vec_packsu(temp3, temp4); + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + int has_altivec(void) { #if CONFIG_DARWIN diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index 1f66017270..b19c0a5e51 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -44,6 +44,9 @@ extern void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line extern void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); extern void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); extern void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); +extern void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h); +extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); +extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h); extern void gmc1_altivec(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder); diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 7dcca85a7c..948b5e51a4 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -40,18 +40,20 @@ int mm_support(void) #ifdef POWERPC_TBL_PERFORMANCE_REPORT unsigned long long perfdata[powerpc_perf_total][powerpc_data_total]; -/* list below must match enum in dsputil_altivec.h */ +/* list below must match enum in dsputil_ppc.h */ static unsigned char* perfname[] = { "fft_calc_altivec", "gmc1_altivec", "dct_unquantize_h263_altivec", "idct_add_altivec", "idct_put_altivec", - "put_pixels_clamped_altivec", "put_pixels16_altivec", "avg_pixels16_altivec", "avg_pixels8_altivec", "put_pixels8_xy2_altivec", + "put_no_rnd_pixels8_xy2_altivec", + "put_pixels16_xy2_altivec", + "put_no_rnd_pixels16_xy2_altivec", "clear_blocks_dcbz32_ppc" }; #ifdef POWERPC_PERF_USE_PMC @@ -65,9 +67,9 @@ void powerpc_display_perf_report(void) { int i; #ifndef POWERPC_PERF_USE_PMC - fprintf(stderr, "AltiVec performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n"); + fprintf(stderr, "PowerPC performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n"); #else /* POWERPC_PERF_USE_PMC */ - fprintf(stderr, "AltiVec performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); + fprintf(stderr, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n"); #endif /* POWERPC_PERF_USE_PMC */ for(i = 0 ; i < powerpc_perf_total ; i++) { @@ -199,21 +201,23 @@ void dsputil_init_ppc(DSPContext* c, unsigned mask) c->pix_sum = pix_sum_altivec; c->diff_pixels = diff_pixels_altivec; c->get_pixels = get_pixels_altivec; -// next two disabled as they're untested. +// next one disabled as it's untested. #if 0 c->add_bytes= add_bytes_altivec; - c->put_pixels_clamped = put_pixels_clamped_altivec; -#endif +#endif /* 0 */ c->put_pixels_tab[0][0] = put_pixels16_altivec; c->avg_pixels_tab[0][0] = avg_pixels16_altivec; // next one disabled as it's untested. #if 0 c->avg_pixels_tab[1][0] = avg_pixels8_altivec; -#endif +#endif /* 0 */ c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; + c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; c->gmc1 = gmc1_altivec; - + #ifdef POWERPC_TBL_PERFORMANCE_REPORT { int i; @@ -228,12 +232,12 @@ void dsputil_init_ppc(DSPContext* c, unsigned mask) perfdata_miss[i][powerpc_data_max] = 0x0000000000000000; perfdata_miss[i][powerpc_data_sum] = 0x0000000000000000; perfdata_miss[i][powerpc_data_num] = 0x0000000000000000; -#endif +#endif /* POWERPC_PERF_USE_PMC */ } } -#endif +#endif /* POWERPC_TBL_PERFORMANCE_REPORT */ } else -#endif +#endif /* HAVE_ALTIVEC */ { // Non-AltiVec PPC optimisations diff --git a/libavcodec/ppc/dsputil_ppc.h b/libavcodec/ppc/dsputil_ppc.h index e0611ff886..6a96c1d289 100644 --- a/libavcodec/ppc/dsputil_ppc.h +++ b/libavcodec/ppc/dsputil_ppc.h @@ -29,11 +29,13 @@ enum powerpc_perf_index { altivec_dct_unquantize_h263_num, altivec_idct_add_num, altivec_idct_put_num, - altivec_put_pixels_clamped_num, altivec_put_pixels16_num, altivec_avg_pixels16_num, altivec_avg_pixels8_num, altivec_put_pixels8_xy2_num, + altivec_put_no_rnd_pixels8_xy2_num, + altivec_put_pixels16_xy2_num, + altivec_put_no_rnd_pixels16_xy2_num, powerpc_clear_blocks_dcbz32, powerpc_perf_total }; |