aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/ppc/h264dsp.c
diff options
context:
space:
mode:
authorRong Yan <rongyan236@gmail.com>2015-05-14 06:43:44 +0000
committerMichael Niedermayer <michaelni@gmx.at>2015-05-14 11:08:07 +0200
commita2cd07d22a5e10b39f65f2cfcbab921244d32152 (patch)
tree1208440109f82acd902a12e82bf76d31978c2043 /libavcodec/ppc/h264dsp.c
parent8252f63d1b982fb8adeb3ac3a79406e3cb422650 (diff)
downloadffmpeg-a2cd07d22a5e10b39f65f2cfcbab921244d32152.tar.gz
avcodec/ppc/h264dsp: POWER LE support in h264_idct_dc_add_internal() fix vec_lvsl bug
We got defective video when use GCC 4.9.2 instead of GCC 4.9.1 to compile FFMEPG. And further found that GCC 4.8 and 4.9 need patch to fix the lvsl/lvsr bug on POWER LE, and GCC 5.1 contains the correct code since its release. The message on gcc-patches requesting approval for lvsl/lvsr patch is at https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00228.html. The fixed code avoids using lvsl and will not depends on GCC version, also it uses less instructions on POWER LE. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/ppc/h264dsp.c')
-rw-r--r--libavcodec/ppc/h264dsp.c26
1 files changed, 15 insertions, 11 deletions
diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c
index da118a49b6..3822c7f55a 100644
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c
@@ -256,6 +256,11 @@ static void h264_idct8_add_altivec(uint8_t *dst, int16_t *dct, int stride)
ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
}
+#if HAVE_BIGENDIAN
+#define DST_LD vec_ld
+#else
+#define DST_LD vec_vsx_ld
+#endif
static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size)
{
vec_s16 dc16;
@@ -275,18 +280,17 @@ static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *bl
dcplus = vec_packsu(dc16, zero_s16v);
dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
+#if HAVE_BIGENDIAN
aligner = vec_lvsr(0, dst);
-#if !HAVE_BIGENDIAN
- aligner = vec_perm(aligner, zero_u8v, vcswapc());
-#endif
dcplus = vec_perm(dcplus, dcplus, aligner);
dcminus = vec_perm(dcminus, dcminus, aligner);
+#endif
for (i = 0; i < size; i += 4) {
- v0 = vec_ld(0, dst+0*stride);
- v1 = vec_ld(0, dst+1*stride);
- v2 = vec_ld(0, dst+2*stride);
- v3 = vec_ld(0, dst+3*stride);
+ v0 = DST_LD(0, dst+0*stride);
+ v1 = DST_LD(0, dst+1*stride);
+ v2 = DST_LD(0, dst+2*stride);
+ v3 = DST_LD(0, dst+3*stride);
v0 = vec_adds(v0, dcplus);
v1 = vec_adds(v1, dcplus);
@@ -298,10 +302,10 @@ static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *bl
v2 = vec_subs(v2, dcminus);
v3 = vec_subs(v3, dcminus);
- vec_st(v0, 0, dst+0*stride);
- vec_st(v1, 0, dst+1*stride);
- vec_st(v2, 0, dst+2*stride);
- vec_st(v3, 0, dst+3*stride);
+ VEC_ST(v0, 0, dst+0*stride);
+ VEC_ST(v1, 0, dst+1*stride);
+ VEC_ST(v2, 0, dst+2*stride);
+ VEC_ST(v3, 0, dst+3*stride);
dst += 4*stride;
}