diff options
author | ZhouXiaoyong <zhouxiaoyong@loongson.cn> | 2015-04-16 14:42:44 +0800 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-04-27 02:25:12 +0200 |
commit | 0ace686ae8543750165d423adfe9249f3ce4c235 (patch) | |
tree | 356a866bea9a5b07832dbdfa071d19e3fedc3fd5 | |
parent | 4b8a8194964415022667099b502e60989f4d3134 (diff) | |
download | ffmpeg-0ace686ae8543750165d423adfe9249f3ce4c235.tar.gz |
avcodec: optimize mathops for Loongson-3 v1
HAVE_LOONGSON is replaced by HAVE_LOONGSON3. Even Loongson-2E and 2F support
Loongson SIMD instructs but have low performance for decoding. We plan to focus
on optimizing Loongson-3A1000, 3B1500 and 3A1500, and modify the configure file
to support Loongson-2 series later by adding HAVE_LOONGSON2.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/mathops.h | 3 | ||||
-rw-r--r-- | libavcodec/mips/mathops.h | 78 |
2 files changed, 73 insertions, 8 deletions
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h index 87d110b457..46283ca444 100644 --- a/libavcodec/mathops.h +++ b/libavcodec/mathops.h @@ -211,6 +211,8 @@ if ((y) < (x)) {\ # define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32)) #endif /* FASTDIV */ +#ifndef ff_sqrt +#define ff_sqrt ff_sqrt static inline av_const unsigned int ff_sqrt(unsigned int a) { unsigned int b; @@ -230,6 +232,7 @@ static inline av_const unsigned int ff_sqrt(unsigned int a) return b - (a < b * b); } +#endif static inline int8_t ff_u8_to_s8(uint8_t a) { diff --git a/libavcodec/mips/mathops.h b/libavcodec/mips/mathops.h index 5673fc01ba..cdc7705bad 100644 --- a/libavcodec/mips/mathops.h +++ b/libavcodec/mips/mathops.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> + * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn> * * This file is part of FFmpeg. * @@ -27,14 +28,73 @@ #if HAVE_INLINE_ASM -#if HAVE_LOONGSON +#if HAVE_LOONGSON3 + +#define MULH MULH +static inline av_const int MULH(int a, int b) +{ + int c; + __asm__ ("dmult %1, %2 \n\t" + "mflo %0 \n\t" + "dsrl %0, %0, 32 \n\t" + : "=r"(c) + : "r"(a),"r"(b) + : "hi", "lo"); + return c; +} + +#define UMULH UMULH +static inline av_const unsigned UMULH(unsigned a, unsigned b) +{ + unsigned c; + __asm__ ("dmultu %1, %2 \n\t" + "mflo %0 \n\t" + "dsrl %0, %0, 32 \n\t" + : "=r"(c) + : "r"(a),"r"(b) + : "hi", "lo"); + return c; +} + +#define mid_pred mid_pred +static inline av_const int mid_pred(int a, int b, int c) +{ + int t = b; + __asm__ ("sgt $8, %1, %2 \n\t" + "movn %0, %1, $8 \n\t" + "movn %1, %2, $8 \n\t" + "sgt $8, %1, %3 \n\t" + "movz %1, %3, $8 \n\t" + "sgt $8, %0, %1 \n\t" + "movn %0, %1, $8 \n\t" + : "+&r"(t),"+&r"(a) + : "r"(b),"r"(c) + : "$8"); + return t; +} + +#define ff_sqrt ff_sqrt +static inline av_const unsigned int ff_sqrt(unsigned int a) +{ + unsigned int b; + + __asm__ ("ctc1 %1, $f0 \n\t" + "sqrt.s $f2, $f0 \n\t" + "cvt.w.s $f0, $f2 \n\t" + "cfc1 %0, $f0 \n\t" + : "=r"(b) + : "r"(a)); + return b; +} static inline av_const int64_t MAC64(int64_t d, int a, int b) { int64_t m; - __asm__ ("dmult.g %1, %2, %3 \n\t" - "daddu %0, %0, %1 \n\t" - : "+r"(d), "=&r"(m) : "r"(a), "r"(b)); + __asm__ ("dmult %2, %3 \n\t" + "mflo %1 \n\t" + "daddu %0, %0, %1 \n\t" + : "+r"(d), "=&r"(m) : "r"(a), "r"(b) + : "hi", "lo"); return d; } #define MAC64(d, a, b) ((d) = MAC64(d, a, b)) @@ -42,14 +102,16 @@ static inline av_const int64_t MAC64(int64_t d, int a, int b) static inline av_const int64_t MLS64(int64_t d, int a, int b) { int64_t m; - __asm__ ("dmult.g %1, %2, %3 \n\t" - "dsubu %0, %0, %1 \n\t" - : "+r"(d), "=&r"(m) : "r"(a), "r"(b)); + __asm__ ("dmult %2, %3 \n\t" + "mflo %1 \n\t" + "dsubu %0, %0, %1 \n\t" + : "+r"(d), "=&r"(m) : "r"(a), "r"(b) + : "hi", "lo"); return d; } #define MLS64(d, a, b) ((d) = MLS64(d, a, b)) -#endif +#endif /* HAVE_LOONGSON3 */ #endif /* HAVE_INLINE_ASM */ |