aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuca Barbato <lu_zero@gentoo.org>2007-12-22 23:10:02 +0000
committerLuca Barbato <lu_zero@gentoo.org>2007-12-22 23:10:02 +0000
commit08571377e6b3fc09dc139391f3e96a0688d1a07d (patch)
treeb4fcb96f8954b94d25a01af10751b1c07a69d3e1
parent1e0f346896841598ee18f0145d86c2f352ee8ed8 (diff)
downloadffmpeg-08571377e6b3fc09dc139391f3e96a0688d1a07d.tar.gz
Add C/B == 0 cases, 2% slower on CELL but should address Issue299 eventually
Originally committed as revision 11306 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/ppc/h264_template_altivec.c45
1 files changed, 34 insertions, 11 deletions
diff --git a/libavcodec/ppc/h264_template_altivec.c b/libavcodec/ppc/h264_template_altivec.c
index 6302381bde..ce89cca25a 100644
--- a/libavcodec/ppc/h264_template_altivec.c
+++ b/libavcodec/ppc/h264_template_altivec.c
@@ -52,11 +52,12 @@
src += stride;
#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
- vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\
+\
+ vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);\
+ vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);\
\
psum = vec_mladd(vA, vsrc0ssH, v32ss);\
- psum = vec_mladd(vB, vsrc1ssH, psum);\
- psum = vec_mladd(vC, vsrc2ssH, psum);\
+ psum = vec_mladd(vE, vsrc1ssH, psum);\
psum = vec_sr(psum, v6us);\
\
vdst = vec_ld(0, dst);\
@@ -67,9 +68,6 @@
\
vec_st(fsum, 0, dst);\
\
- vsrc0ssH = vsrc1ssH;\
- vsrc1ssH = vsrc2ssH;\
-\
dst += stride;\
src += stride;
@@ -155,23 +153,48 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
}
}
} else {
+ const vec_s16_t vE = vec_add(vB, vC);
+ if (ABCD[2]) { // y == 0 B == 0
if (!loadSecond) {// -> !reallyBadAlign
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
- vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
-
+ vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+
+ vsrc0uc = vsrc1uc;
}
} else {
vec_u8_t vsrcDuc;
for (i = 0 ; i < h ; i++) {
- vsrcCuc = vec_ld(stride + 0, src);
- vsrcDuc = vec_ld(stride + 16, src);
- vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ vsrcCuc = vec_ld(0, src);
+ vsrcDuc = vec_ld(15, src);
+ vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+ vsrc0uc = vsrc1uc;
+ }
+ }
+ } else { // x == 0 C == 0
+ if (!loadSecond) {// -> !reallyBadAlign
+ for (i = 0 ; i < h ; i++) {
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+
+ vsrcCuc = vec_ld(0, src);
+ vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+ vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+ }
+ } else {
+ vec_u8_t vsrcDuc;
+ for (i = 0 ; i < h ; i++) {
CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+
+ vsrcCuc = vec_ld(0, src);
+ vsrcDuc = vec_ld(15, src);
+ vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
}
}
+ }
}
POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
}