diff options
author | Shivraj Patil <shivraj.patil@imgtec.com> | 2015-06-29 20:57:15 +0530 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-07-07 14:35:15 +0200 |
commit | d12f76ffbb1b68d3c8a2859b7a095080ba985fa2 (patch) | |
tree | b770a2fb5efe144fbe65756ac8da582fedee3bd8 /libavutil | |
parent | 9c95734e1c0f1a086d1c71b65c29355ef6f7785d (diff) | |
download | ffmpeg-d12f76ffbb1b68d3c8a2859b7a095080ba985fa2.tar.gz |
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions
This patch adds MSA (MIPS-SIMD-Arch) optimizations for idctdsp functions in new file idctdsp_msa.c and simple_idct_msa.c
Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/mips/generic_macros_msa.h | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index d6a2573403..6e5598056e 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -507,6 +507,14 @@ ST_SW(in0, (pdst)); \ ST_SW(in1, (pdst) + stride); \ } +#define ST_SW8(in0, in1, in2, in3, in4, in5, in6, in7, \ + pdst, stride) \ +{ \ + ST_SW2(in0, in1, (pdst), stride); \ + ST_SW2(in2, in3, (pdst) + 2 * stride, stride); \ + ST_SW2(in4, in5, (pdst) + 4 * stride, stride); \ + ST_SW2(in6, in7, (pdst) + 6 * stride, stride); \ +} /* Description : Store as 2x4 byte block to destination memory from input vector Arguments : Inputs - in, stidx, pdst, stride @@ -2382,6 +2390,35 @@ out7 = in0 - in7; \ } +/* Description : Butterfly of 16 input vectors + Arguments : Inputs - in0 ... in15 + Outputs - out0 .. out15 + Details : Butterfly operation +*/ +#define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, \ + in8, in9, in10, in11, in12, in13, in14, in15, \ + out0, out1, out2, out3, out4, out5, out6, out7, \ + out8, out9, out10, out11, out12, out13, out14, out15) \ +{ \ + out0 = in0 + in15; \ + out1 = in1 + in14; \ + out2 = in2 + in13; \ + out3 = in3 + in12; \ + out4 = in4 + in11; \ + out5 = in5 + in10; \ + out6 = in6 + in9; \ + out7 = in7 + in8; \ + \ + out8 = in7 - in8; \ + out9 = in6 - in9; \ + out10 = in5 - in10; \ + out11 = in4 - in11; \ + out12 = in3 - in12; \ + out13 = in2 - in13; \ + out14 = in1 - in14; \ + out15 = in0 - in15; \ +} + /* Description : Transposes input 4x4 byte block Arguments : Inputs - in0, in1, in2, in3 (input 4x4 byte block) Outputs - out0, out1, out2, out3 (output 4x4 byte block) |