diff options
author | gxw <guxiwei-hf@loongson.cn> | 2019-10-21 15:56:47 +0800 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2019-10-30 18:09:00 +0100 |
commit | 648b422e171d5eab18f6c6fd346e4050d717b936 (patch) | |
tree | 838a0d9c88253b2f5c436e49d6b750fb17cd175e /libavutil/mips | |
parent | af70c94c63cc0ccf6a8078e87c81d061b8765889 (diff) | |
download | ffmpeg-648b422e171d5eab18f6c6fd346e4050d717b936.tar.gz |
avcodec/mips: msa optimizations for vc1dsp
Performance of WMV3 decoding has speed up from 3.66x to 5.23x tested on 3A4000.
Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil/mips')
-rw-r--r-- | libavutil/mips/generic_macros_msa.h | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index c25509e483..267d4e6ca5 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -299,6 +299,7 @@ #define LD_SB4(...) LD_V4(v16i8, __VA_ARGS__) #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__) #define LD_SH4(...) LD_V4(v8i16, __VA_ARGS__) +#define LD_SW4(...) LD_V4(v4i32, __VA_ARGS__) #define LD_V5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \ { \ @@ -337,6 +338,7 @@ #define LD_SB8(...) LD_V8(v16i8, __VA_ARGS__) #define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__) #define LD_SH8(...) LD_V8(v8i16, __VA_ARGS__) +#define LD_SW8(...) LD_V8(v4i32, __VA_ARGS__) #define LD_V16(RTYPE, psrc, stride, \ out0, out1, out2, out3, out4, out5, out6, out7, \ @@ -1382,6 +1384,7 @@ out4, out5, out6, out7); \ } #define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__) +#define ILVR_B8_SW(...) ILVR_B8(v4i32, __VA_ARGS__) /* Description : Interleave right half of halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 |