diff options
author | Kostya Shishkov <kostya.shishkov@gmail.com> | 2013-08-14 15:28:05 -0400 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2013-08-16 10:08:47 +0300 |
commit | f399e406af0c8507bb3ab7b94995ad7b8f409093 (patch) | |
tree | 47173770e8d266f51d3d6831d5ef4b0a380f2ddb /libavcodec/ppc | |
parent | 9d86bfc259ae9ba7a76067ec931ff20fbb86ea2a (diff) | |
download | ffmpeg-f399e406af0c8507bb3ab7b94995ad7b8f409093.tar.gz |
altivec: perform an explicit unaligned load
Implicit vector loads on POWER7 hardware can use the VSX
instruction set instead of classic Altivec/VMX. Let's force
a VMX load in this case.
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/ppc')
-rw-r--r-- | libavcodec/ppc/int_altivec.c | 4 |
1 files changed, 1 insertions, 3 deletions
diff --git a/libavcodec/ppc/int_altivec.c b/libavcodec/ppc/int_altivec.c index 8357ca768b..38ec99b8c5 100644 --- a/libavcodec/ppc/int_altivec.c +++ b/libavcodec/ppc/int_altivec.c @@ -84,14 +84,12 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, { int i; LOAD_ZERO; - const vec_s16 *pv; register vec_s16 vec1; register vec_s32 res = vec_splat_s32(0), t; int32_t ires; for(i = 0; i < order; i += 8){ - pv = (const vec_s16*)v1; - vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1)); + vec1 = vec_unaligned_load(v1); t = vec_msum(vec1, vec_ld(0, v2), zero_s32v); res = vec_sums(t, res); v1 += 8; |