aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/mips
diff options
context:
space:
mode:
authorgxw <guxiwei-hf@loongson.cn>2019-10-21 15:56:47 +0800
committerMichael Niedermayer <michael@niedermayer.cc>2019-10-30 18:09:00 +0100
commit648b422e171d5eab18f6c6fd346e4050d717b936 (patch)
tree838a0d9c88253b2f5c436e49d6b750fb17cd175e /libavutil/mips
parentaf70c94c63cc0ccf6a8078e87c81d061b8765889 (diff)
downloadffmpeg-648b422e171d5eab18f6c6fd346e4050d717b936.tar.gz
avcodec/mips: msa optimizations for vc1dsp
Performance of WMV3 decoding has speed up from 3.66x to 5.23x tested on 3A4000. Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil/mips')
-rw-r--r--libavutil/mips/generic_macros_msa.h3
1 files changed, 3 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index c25509e483..267d4e6ca5 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -299,6 +299,7 @@
#define LD_SB4(...) LD_V4(v16i8, __VA_ARGS__)
#define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)
#define LD_SH4(...) LD_V4(v8i16, __VA_ARGS__)
+#define LD_SW4(...) LD_V4(v4i32, __VA_ARGS__)
#define LD_V5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \
{ \
@@ -337,6 +338,7 @@
#define LD_SB8(...) LD_V8(v16i8, __VA_ARGS__)
#define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__)
#define LD_SH8(...) LD_V8(v8i16, __VA_ARGS__)
+#define LD_SW8(...) LD_V8(v4i32, __VA_ARGS__)
#define LD_V16(RTYPE, psrc, stride, \
out0, out1, out2, out3, out4, out5, out6, out7, \
@@ -1382,6 +1384,7 @@
out4, out5, out6, out7); \
}
#define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)
+#define ILVR_B8_SW(...) ILVR_B8(v4i32, __VA_ARGS__)
/* Description : Interleave right half of halfword elements from vectors
Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7