diff options
author | Luca Barbato <lu_zero@gentoo.org> | 2007-12-22 23:10:02 +0000 |
---|---|---|
committer | Luca Barbato <lu_zero@gentoo.org> | 2007-12-22 23:10:02 +0000 |
commit | 08571377e6b3fc09dc139391f3e96a0688d1a07d (patch) | |
tree | b4fcb96f8954b94d25a01af10751b1c07a69d3e1 | |
parent | 1e0f346896841598ee18f0145d86c2f352ee8ed8 (diff) | |
download | ffmpeg-08571377e6b3fc09dc139391f3e96a0688d1a07d.tar.gz |
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
Originally committed as revision 11306 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/ppc/h264_template_altivec.c | 45 |
1 files changed, 34 insertions, 11 deletions
diff --git a/libavcodec/ppc/h264_template_altivec.c b/libavcodec/ppc/h264_template_altivec.c index 6302381bde..ce89cca25a 100644 --- a/libavcodec/ppc/h264_template_altivec.c +++ b/libavcodec/ppc/h264_template_altivec.c @@ -52,11 +52,12 @@ src += stride; #define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \ - vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\ +\ + vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);\ + vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);\ \ psum = vec_mladd(vA, vsrc0ssH, v32ss);\ - psum = vec_mladd(vB, vsrc1ssH, psum);\ - psum = vec_mladd(vC, vsrc2ssH, psum);\ + psum = vec_mladd(vE, vsrc1ssH, psum);\ psum = vec_sr(psum, v6us);\ \ vdst = vec_ld(0, dst);\ @@ -67,9 +68,6 @@ \ vec_st(fsum, 0, dst);\ \ - vsrc0ssH = vsrc1ssH;\ - vsrc1ssH = vsrc2ssH;\ -\ dst += stride;\ src += stride; @@ -155,23 +153,48 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, } } } else { + const vec_s16_t vE = vec_add(vB, vC); + if (ABCD[2]) { // y == 0 B == 0 if (!loadSecond) {// -> !reallyBadAlign for (i = 0 ; i < h ; i++) { vsrcCuc = vec_ld(stride + 0, src); - vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); - + vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); CHROMA_MC8_ALTIVEC_CORE_SIMPLE + + vsrc0uc = vsrc1uc; } } else { vec_u8_t vsrcDuc; for (i = 0 ; i < h ; i++) { - vsrcCuc = vec_ld(stride + 0, src); - vsrcDuc = vec_ld(stride + 16, src); - vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); + vsrcCuc = vec_ld(0, src); + vsrcDuc = vec_ld(15, src); + vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); + CHROMA_MC8_ALTIVEC_CORE_SIMPLE + vsrc0uc = vsrc1uc; + } + } + } else { // x == 0 C == 0 + if (!loadSecond) {// -> !reallyBadAlign + for (i = 0 ; i < h ; i++) { + CHROMA_MC8_ALTIVEC_CORE_SIMPLE + + vsrcCuc = vec_ld(0, src); + vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); + vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); + } + } else { + vec_u8_t vsrcDuc; + for (i = 0 ; i < h ; i++) { CHROMA_MC8_ALTIVEC_CORE_SIMPLE + + vsrcCuc = vec_ld(0, src); + vsrcDuc = vec_ld(15, src); + vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); + vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); } } + } } POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); } |