diff options
author | Romain Dolbeau <dolbeau@irisa.fr> | 2003-01-19 19:00:45 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2003-01-19 19:00:45 +0000 |
commit | 35e5fb067e0099fde6c3429131f11433eb92f22b (patch) | |
tree | 50480aa9498173e6b2f12f977d6e948b8f90fa53 /libavcodec/ppc/dsputil_altivec.c | |
parent | 8c55915b14897fd9bdaae9241f2dd54c00a869ef (diff) | |
download | ffmpeg-35e5fb067e0099fde6c3429131f11433eb92f22b.tar.gz |
PPC perf, PPC clear_block, AltiVec put_pixels8_xy2 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
Originally committed as revision 1476 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/ppc/dsputil_altivec.c')
-rw-r--r-- | libavcodec/ppc/dsputil_altivec.c | 254 |
1 files changed, 198 insertions, 56 deletions
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index e7f34ee16c..fcae7c1180 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -24,22 +24,6 @@ #include <sys/sysctl.h> #endif -#ifdef ALTIVEC_TBL_PERFORMANCE_REPORT -unsigned long long perfdata[altivec_perf_total][altivec_data_total]; -/* list below must match enum in dsputil_altivec.h */ -static unsigned char* perfname[] = { - "fft_calc", - "gmc1", - "dct_unquantize_h263", - "idct_add", - "idct_put", - "put_pixels_clamped", - "put_pixels16", - "avg_pixels16" -}; -#include <stdio.h> -#endif - int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { int i; @@ -648,12 +632,12 @@ extern UINT8 cropTbl[]; void put_pixels_clamped_altivec(const DCTELEM *block, UINT8 *restrict pixels, int line_size) { -ALTIVEC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_DECLARE(altivec_put_pixels_clamped_num, 1); #ifdef ALTIVEC_USE_REFERENCE_C_CODE int i; UINT8 *cm = cropTbl + MAX_NEG_CROP; -ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); /* read the pixels */ for(i=0;i<8;i++) { @@ -670,7 +654,7 @@ ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); block += 8; } -ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ register const vector short vczero = (const vector short)(0); @@ -681,7 +665,7 @@ ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); pixelsv0, pixelsv1, pixelsv2, pixelsv3, pixelsv4, pixelsv0old, pixelsv4old; -ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); blockv0 = vec_ld(0, block); blockv1 = vec_ld(16, block); @@ -720,17 +704,17 @@ ALTIVEC_TBL_START_COUNT(altivec_put_pixels_clamped_num, 1); vec_st(pixelsv3, 48, pixels); } -ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_put_pixels_clamped_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { -ALTIVEC_TBL_DECLARE(altivec_put_pixels16_num, 1); +POWERPC_TBL_DECLARE(altivec_put_pixels16_num, 1); #ifdef ALTIVEC_USE_REFERENCE_C_CODE int i; -ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1); +POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); for(i=0; i<h; i++) { *((uint32_t*)(block )) = (((const struct unaligned_32 *) (pixels))->l); @@ -741,25 +725,24 @@ ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1); block +=line_size; } -ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ - - register vector unsigned char perm = vec_lvsl(0, pixels); register vector unsigned char pixelsv1, pixelsv2; int i; -ALTIVEC_TBL_START_COUNT(altivec_put_pixels16_num, 1); +POWERPC_TBL_START_COUNT(altivec_put_pixels16_num, 1); for(i=0; i<h; i++) { pixelsv1 = vec_ld(0, (unsigned char*)pixels); pixelsv2 = vec_ld(16, (unsigned char*)pixels); - vec_st(vec_perm(pixelsv1, pixelsv2, perm), 0, (unsigned char*)block); + vec_st(vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)), + 0, (unsigned char*)block); pixels+=line_size; block +=line_size; } -ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } @@ -767,11 +750,11 @@ ALTIVEC_TBL_STOP_COUNT(altivec_put_pixels16_num, 1); #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { -ALTIVEC_TBL_DECLARE(altivec_avg_pixels16_num, 1); +POWERPC_TBL_DECLARE(altivec_avg_pixels16_num, 1); #ifdef ALTIVEC_USE_REFERENCE_C_CODE int i; -ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); +POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); for(i=0; i<h; i++) { op_avg(*((uint32_t*)(block)),(((const struct unaligned_32 *)(pixels))->l)); @@ -782,32 +765,210 @@ ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); block +=line_size; } -ALTIVEC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); #else /* ALTIVEC_USE_REFERENCE_C_CODE */ - - register vector unsigned char perm = vec_lvsl(0, pixels); register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; int i; -ALTIVEC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); +POWERPC_TBL_START_COUNT(altivec_avg_pixels16_num, 1); for(i=0; i<h; i++) { pixelsv1 = vec_ld(0, (unsigned char*)pixels); pixelsv2 = vec_ld(16, (unsigned char*)pixels); blockv = vec_ld(0, block); - pixelsv = vec_perm(pixelsv1, pixelsv2, perm); + pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); blockv = vec_avg(blockv,pixelsv); vec_st(blockv, 0, (unsigned char*)block); pixels+=line_size; block +=line_size; } -ALTIVEC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels16_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } +void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, + int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_avg_pixels8_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int i; +POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); + for (i = 0; i < h; i++) { + *((uint32_t *) (block)) = + (((*((uint32_t *) (block))) | + ((((const struct unaligned_32 *) (pixels))->l))) - + ((((*((uint32_t *) (block))) ^ + ((((const struct unaligned_32 *) (pixels))-> + l))) & 0xFEFEFEFEUL) >> 1)); + *((uint32_t *) (block + 4)) = + (((*((uint32_t *) (block + 4))) | + ((((const struct unaligned_32 *) (pixels + 4))->l))) - + ((((*((uint32_t *) (block + 4))) ^ + ((((const struct unaligned_32 *) (pixels + + 4))-> + l))) & 0xFEFEFEFEUL) >> 1)); + pixels += line_size; + block += line_size; + } +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; + int i; + +POWERPC_TBL_START_COUNT(altivec_avg_pixels8_num, 1); + + for (i = 0; i < h; i++) { + /* + block is 8 bytes-aligned, so we're either in the + left block (16 bytes-aligned) or in the right block (not) + */ + int rightside = ((unsigned long)block & 0x0000000F); + + blockv = vec_ld(0, block); + pixelsv1 = vec_ld(0, (unsigned char*)pixels); + pixelsv2 = vec_ld(16, (unsigned char*)pixels); + pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); + + if (rightside) + { + pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1)); + } + else + { + pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3)); + } + + blockv = vec_avg(blockv, pixelsv); + + vec_st(blockv, 0, block); + + pixels += line_size; + block += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_avg_pixels8_num, 1); + +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + +void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ +POWERPC_TBL_DECLARE(altivec_put_pixels8_xy2_num, 1); +#ifdef ALTIVEC_USE_REFERENCE_C_CODE + int j; +POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); + for (j = 0; j < 2; j++) { + int i; + const uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + const uint32_t b = + (((const struct unaligned_32 *) (pixels + 1))->l); + uint32_t l0 = + (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + uint32_t h0 = + ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + uint32_t l1, h1; + pixels += line_size; + for (i = 0; i < h; i += 2) { + uint32_t a = (((const struct unaligned_32 *) (pixels))->l); + uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l); + l1 = (a & 0x03030303UL) + (b & 0x03030303UL); + h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + a = (((const struct unaligned_32 *) (pixels))->l); + b = (((const struct unaligned_32 *) (pixels + 1))->l); + l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL; + h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2); + *((uint32_t *) block) = + h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL); + pixels += line_size; + block += line_size; + } pixels += 4 - line_size * (h + 1); + block += 4 - line_size * h; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); + +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + register int i; + register vector unsigned char + pixelsv1, pixelsv2, + pixelsavg; + register vector unsigned char + blockv, temp1, temp2; + register vector unsigned short + pixelssum1, pixelssum2, temp3; + register const vector unsigned char vczero = (const vector unsigned char)(0); + register const vector unsigned short vctwo = (const vector unsigned short)(2); + + temp1 = vec_ld(0, pixels); + temp2 = vec_ld(16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); + if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); + } + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum1 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + pixelssum1 = vec_add(pixelssum1, vctwo); + +POWERPC_TBL_START_COUNT(altivec_put_pixels8_xy2_num, 1); + for (i = 0; i < h ; i++) { + int rightside = ((unsigned long)block & 0x0000000F); + blockv = vec_ld(0, block); + + temp1 = vec_ld(line_size, pixels); + temp2 = vec_ld(line_size + 16, pixels); + pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); + if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) + { + pixelsv2 = temp2; + } + else + { + pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); + } + + pixelsv1 = vec_mergeh(vczero, pixelsv1); + pixelsv2 = vec_mergeh(vczero, pixelsv2); + pixelssum2 = vec_add((vector unsigned short)pixelsv1, + (vector unsigned short)pixelsv2); + temp3 = vec_add(pixelssum1, pixelssum2); + temp3 = vec_sra(temp3, vctwo); + pixelssum1 = vec_add(pixelssum2, vctwo); + pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); + + if (rightside) + { + blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); + } + else + { + blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); + } + + vec_st(blockv, 0, block); + + block += line_size; + pixels += line_size; + } + +POWERPC_TBL_STOP_COUNT(altivec_put_pixels8_xy2_num, 1); +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ +} + int has_altivec(void) { #if CONFIG_DARWIN @@ -822,22 +983,3 @@ int has_altivec(void) #endif return 0; } - -#ifdef ALTIVEC_TBL_PERFORMANCE_REPORT -void altivec_display_perf_report(void) -{ - int i; - fprintf(stderr, "AltiVec performance report\n Values are from the Time Base register, and represent 4 bus cycles.\n"); - for(i = 0 ; i < altivec_perf_total ; i++) - { - if (perfdata[i][altivec_data_num] != (unsigned long long)0) - fprintf(stderr, " Function \"%s\":\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n", - perfname[i], - perfdata[i][altivec_data_min], - perfdata[i][altivec_data_max], - (double)perfdata[i][altivec_data_sum] / - (double)perfdata[i][altivec_data_num], - perfdata[i][altivec_data_num]); - } -} -#endif /* ALTIVEC_TBL_PERFORMANCE_REPORT */ |