diff options
author | Rong Yan <rongyan236@gmail.com> | 2014-12-02 07:37:11 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-12-02 17:13:47 +0100 |
commit | b4d41beebe869210df4b37fb4b872b1518ae06ef (patch) | |
tree | c34bff80259a3585493e8846839df95191aca1b1 /libavcodec/ppc | |
parent | d7716961a856e0d6fcd23d82a5cb87c3a8439e4b (diff) | |
download | ffmpeg-b4d41beebe869210df4b37fb4b872b1518ae06ef.tar.gz |
avcodec/ppc/lossless_audiodsp_altivec: POWER LE support for scalarproduct_and_madd_int16_altivec()
adds macro GET_T()
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/ppc')
-rw-r--r-- | libavcodec/ppc/lossless_audiodsp_altivec.c | 31 |
1 files changed, 21 insertions, 10 deletions
diff --git a/libavcodec/ppc/lossless_audiodsp_altivec.c b/libavcodec/ppc/lossless_audiodsp_altivec.c index 1ebb0f4aa3..bdec25223d 100644 --- a/libavcodec/ppc/lossless_audiodsp_altivec.c +++ b/libavcodec/ppc/lossless_audiodsp_altivec.c @@ -29,6 +29,20 @@ #include "libavutil/ppc/types_altivec.h" #include "libavcodec/lossless_audiodsp.h" +#if HAVE_BIGENDIAN +#define GET_T(tt0,tt1,src,a,b){ \ + a = vec_ld(16, src); \ + tt0 = vec_perm(b, a, align); \ + b = vec_ld(32, src); \ + tt1 = vec_perm(a, b, align); \ + } +#else +#define GET_T(tt0,tt1,src,a,b){ \ + tt0 = vec_vsx_ld(0, src); \ + tt1 = vec_vsx_ld(16, src); \ + } +#endif + #if HAVE_ALTIVEC static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, const int16_t *v2, @@ -38,26 +52,23 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, LOAD_ZERO; vec_s16 *pv1 = (vec_s16 *) v1; register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul }; - register vec_s16 t0, t1, i0, i1, i4; - register vec_s16 i2 = vec_ld(0, v2), i3 = vec_ld(0, v3); + register vec_s16 t0, t1, i0, i1, i4, i2, i3; register vec_s32 res = zero_s32v; +#if HAVE_BIGENDIAN register vec_u8 align = vec_lvsl(0, v2); + i2 = vec_ld(0, v2); + i3 = vec_ld(0, v3); +#endif int32_t ires; order >>= 4; do { - i1 = vec_ld(16, v2); - t0 = vec_perm(i2, i1, align); - i2 = vec_ld(32, v2); - t1 = vec_perm(i1, i2, align); + GET_T(t0,t1,v2,i1,i2); i0 = pv1[0]; i1 = pv1[1]; res = vec_msum(t0, i0, res); res = vec_msum(t1, i1, res); - i4 = vec_ld(16, v3); - t0 = vec_perm(i3, i4, align); - i3 = vec_ld(32, v3); - t1 = vec_perm(i4, i3, align); + GET_T(t0,t1,v3,i4,i3); pv1[0] = vec_mladd(t0, muls, i0); pv1[1] = vec_mladd(t1, muls, i1); pv1 += 2; |