avcodec/mips: msa optimizations for vc1dsp

Performance of WMV3 decoding has speed up from 3.66x to 5.23x tested on 3A4000. Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
author: gxw <guxiwei-hf@loongson.cn> 2019-10-21 15:56:47 +0800
committer: Michael Niedermayer <michael@niedermayer.cc> 2019-10-30 18:09:00 +0100
commit: 648b422e171d5eab18f6c6fd346e4050d717b936 (patch)
tree: 838a0d9c88253b2f5c436e49d6b750fb17cd175e /libavutil/mips
parent: af70c94c63cc0ccf6a8078e87c81d061b8765889 (diff)
download: ffmpeg-648b422e171d5eab18f6c6fd346e4050d717b936.tar.gz
1 files changed, 3 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index c25509e483..267d4e6ca5 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -299,6 +299,7 @@
 #define LD_SB4(...) LD_V4(v16i8, __VA_ARGS__)
 #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)
 #define LD_SH4(...) LD_V4(v8i16, __VA_ARGS__)
+#define LD_SW4(...) LD_V4(v4i32, __VA_ARGS__)
 
 #define LD_V5(RTYPE, psrc, stride, out0, out1, out2, out3, out4)  \
 {                                                                 \
@@ -337,6 +338,7 @@
 #define LD_SB8(...) LD_V8(v16i8, __VA_ARGS__)
 #define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__)
 #define LD_SH8(...) LD_V8(v8i16, __VA_ARGS__)
+#define LD_SW8(...) LD_V8(v4i32, __VA_ARGS__)
 
 #define LD_V16(RTYPE, psrc, stride,                                   \
                out0, out1, out2, out3, out4, out5, out6, out7,        \
@@ -1382,6 +1384,7 @@
             out4, out5, out6, out7);                              \
 }
 #define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)
+#define ILVR_B8_SW(...) ILVR_B8(v4i32, __VA_ARGS__)
 
 /* Description : Interleave right half of halfword elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
author	gxw <guxiwei-hf@loongson.cn>	2019-10-21 15:56:47 +0800
committer	Michael Niedermayer <michael@niedermayer.cc>	2019-10-30 18:09:00 +0100
commit	648b422e171d5eab18f6c6fd346e4050d717b936 (patch)
tree	838a0d9c88253b2f5c436e49d6b750fb17cd175e /libavutil/mips
parent	af70c94c63cc0ccf6a8078e87c81d061b8765889 (diff)
download	ffmpeg-648b422e171d5eab18f6c6fd346e4050d717b936.tar.gz