aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil
diff options
context:
space:
mode:
authorShivraj Patil <shivraj.patil@imgtec.com>2015-06-29 20:57:15 +0530
committerMichael Niedermayer <michaelni@gmx.at>2015-07-07 14:35:15 +0200
commitd12f76ffbb1b68d3c8a2859b7a095080ba985fa2 (patch)
treeb770a2fb5efe144fbe65756ac8da582fedee3bd8 /libavutil
parent9c95734e1c0f1a086d1c71b65c29355ef6f7785d (diff)
downloadffmpeg-d12f76ffbb1b68d3c8a2859b7a095080ba985fa2.tar.gz
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions
This patch adds MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions in new file idctdsp_msa.c and simple_idct_msa.c Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/mips/generic_macros_msa.h37
1 files changed, 37 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index d6a2573403..6e5598056e 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -507,6 +507,14 @@
ST_SW(in0, (pdst)); \
ST_SW(in1, (pdst) + stride); \
}
+#define ST_SW8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ pdst, stride) \
+{ \
+ ST_SW2(in0, in1, (pdst), stride); \
+ ST_SW2(in2, in3, (pdst) + 2 * stride, stride); \
+ ST_SW2(in4, in5, (pdst) + 4 * stride, stride); \
+ ST_SW2(in6, in7, (pdst) + 6 * stride, stride); \
+}
/* Description : Store as 2x4 byte block to destination memory from input vector
Arguments : Inputs - in, stidx, pdst, stride
@@ -2382,6 +2390,35 @@
out7 = in0 - in7; \
}
+/* Description : Butterfly of 16 input vectors
+ Arguments : Inputs - in0 ... in15
+ Outputs - out0 .. out15
+ Details : Butterfly operation
+*/
+#define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, \
+ in8, in9, in10, in11, in12, in13, in14, in15, \
+ out0, out1, out2, out3, out4, out5, out6, out7, \
+ out8, out9, out10, out11, out12, out13, out14, out15) \
+{ \
+ out0 = in0 + in15; \
+ out1 = in1 + in14; \
+ out2 = in2 + in13; \
+ out3 = in3 + in12; \
+ out4 = in4 + in11; \
+ out5 = in5 + in10; \
+ out6 = in6 + in9; \
+ out7 = in7 + in8; \
+ \
+ out8 = in7 - in8; \
+ out9 = in6 - in9; \
+ out10 = in5 - in10; \
+ out11 = in4 - in11; \
+ out12 = in3 - in12; \
+ out13 = in2 - in13; \
+ out14 = in1 - in14; \
+ out15 = in0 - in15; \
+}
+
/* Description : Transposes input 4x4 byte block
Arguments : Inputs - in0, in1, in2, in3 (input 4x4 byte block)
Outputs - out0, out1, out2, out3 (output 4x4 byte block)