diff options
author | Kaustubh Raste <kaustubh.raste@imgtec.com> | 2017-10-24 12:41:30 +0530 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2017-10-25 21:50:37 +0200 |
commit | 736a48901fa0061f52d3f6679546d4d6b5fdb510 (patch) | |
tree | 3ba9586fbe518131df8f96cda79715bdd44a2f4a /libavutil/mips | |
parent | ce0a52e9e92950be9350b09fbdc0b0cfdfb862ec (diff) | |
download | ffmpeg-736a48901fa0061f52d3f6679546d4d6b5fdb510.tar.gz |
avcodec/mips: Improve hevc bi weighted hv mc msa functions
Use immediate unsigned saturation for clip to max saving one vector register.
Signed-off-by: Kaustubh Raste <kaustubh.raste@imgtec.com>
Reviewed-by: Manojkumar Bhosale <Manojkumar.Bhosale@imgtec.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil/mips')
-rw-r--r-- | libavutil/mips/generic_macros_msa.h | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index c892529f05..6a46704663 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -1088,6 +1088,25 @@ out_m; \ } ) +#define CLIP_SW_0_255_MAX_SATU(in) \ +( { \ + v4i32 out_m; \ + \ + out_m = __msa_maxi_s_w((v4i32) in, 0); \ + out_m = (v4i32) __msa_sat_u_w((v4u32) out_m, 7); \ + out_m; \ +} ) +#define CLIP_SW2_0_255_MAX_SATU(in0, in1) \ +{ \ + in0 = CLIP_SW_0_255_MAX_SATU(in0); \ + in1 = CLIP_SW_0_255_MAX_SATU(in1); \ +} +#define CLIP_SW4_0_255_MAX_SATU(in0, in1, in2, in3) \ +{ \ + CLIP_SW2_0_255_MAX_SATU(in0, in1); \ + CLIP_SW2_0_255_MAX_SATU(in2, in3); \ +} + /* Description : Addition of 4 signed word elements 4 signed word elements of input vector are added together and resulted integer sum is returned @@ -2244,6 +2263,22 @@ out3 = in6 - in7; \ } +/* Description : Sign extend byte elements from right half of the vector + Arguments : Input - in (byte vector) + Output - out (sign extended halfword vector) + Return Type - signed halfword + Details : Sign bit of byte elements from input vector 'in' is + extracted and interleaved with same vector 'in' to generate + 8 halfword elements keeping sign intact +*/ +#define UNPCK_R_SB_SH(in, out) \ +{ \ + v16i8 sign_m; \ + \ + sign_m = __msa_clti_s_b((v16i8) in, 0); \ + out = (v8i16) __msa_ilvr_b(sign_m, (v16i8) in); \ +} + /* Description : Sign extend halfword elements from right half of the vector Arguments : Inputs - in (input halfword vector) Outputs - out (sign extended word vectors) |