diff options
author | Rong Yan <rongyan236@gmail.com> | 2014-10-10 08:29:58 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-10-10 14:24:22 +0200 |
commit | 0d71bd5a9493a9021d08b46fb0ffb985d44dc178 (patch) | |
tree | a394b7950a45dca5cdcb4cfbc17796e5392b8cb1 | |
parent | c1fa5d1bd4642f75160f7806e7a7756526a119a2 (diff) | |
download | ffmpeg-0d71bd5a9493a9021d08b46fb0ffb985d44dc178.tar.gz |
libavcodec/ppc/hpeldsp_altivec.c : fix ff_put_pixels16_altivec() for POWER LE
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/ppc/hpeldsp_altivec.c | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/libavcodec/ppc/hpeldsp_altivec.c b/libavcodec/ppc/hpeldsp_altivec.c index 7c3b5a1d06..79c2af8ac3 100644 --- a/libavcodec/ppc/hpeldsp_altivec.c +++ b/libavcodec/ppc/hpeldsp_altivec.c @@ -36,6 +36,38 @@ #if HAVE_ALTIVEC /* next one assumes that ((line_size % 16) == 0) */ +#if HAVE_VSX +void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + register vector unsigned char pixelsv1; + register vector unsigned char pixelsv1B; + register vector unsigned char pixelsv1C; + register vector unsigned char pixelsv1D; + + int i; + register ptrdiff_t line_size_2 = line_size << 1; + register ptrdiff_t line_size_3 = line_size + line_size_2; + register ptrdiff_t line_size_4 = line_size << 2; + +// hand-unrolling the loop by 4 gains about 15% +// mininum execution time goes from 74 to 60 cycles +// it's faster than -funroll-loops, but using +// -funroll-loops w/ this is bad - 74 cycles again. +// all this is on a 7450, tuning for the 7450 + for (i = 0; i < h; i += 4) { + pixelsv1 = vec_vsx_ld( 0, pixels); + pixelsv1B = vec_vsx_ld(line_size, pixels); + pixelsv1C = vec_vsx_ld(line_size_2, pixels); + pixelsv1D = vec_vsx_ld(line_size_3, pixels); + vec_vsx_st(pixelsv1, 0, (unsigned char*)block); + vec_vsx_st(pixelsv1B, line_size, (unsigned char*)block); + vec_vsx_st(pixelsv1C, line_size_2, (unsigned char*)block); + vec_st(pixelsv1D, line_size_3, (unsigned char*)block); + pixels+=line_size_4; + block +=line_size_4; + } +} +#else void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { register vector unsigned char pixelsv1, pixelsv2; @@ -76,6 +108,8 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t li } } +#endif /* HAVE_VSX */ + /* next one assumes that ((line_size % 16) == 0) */ #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) |