diff options
author | gxw <guxiwei-hf@loongson.cn> | 2019-08-07 17:52:00 +0800 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2019-08-13 16:48:38 +0200 |
commit | a3e572d96fd1dd6291f6b28e173db858c08ff8d8 (patch) | |
tree | 85807f6ec1442cc362cf8946e67f564c92267e07 /libavcodec/mips/idctdsp_msa.c | |
parent | 8f92eb05e063e6c4d6e36521020620d4e6e1c21d (diff) | |
download | ffmpeg-a3e572d96fd1dd6291f6b28e173db858c08ff8d8.tar.gz |
avutil/mips: refine msa macros CLIP_*.
Changing details as following:
1. Remove the local variable 'out_m' in 'CLIP_SH' and store the result in
source vector.
2. Refine the implementation of macro 'CLIP_SH_0_255' and 'CLIP_SW_0_255'.
Performance of VP8 decoding has speed up about 1.1%(from 7.03x to 7.11x).
Performance of H264 decoding has speed up about 0.5%(from 4.35x to 4.37x).
Performance of Theora decoding has speed up about 0.7%(from 5.79x to 5.83x).
3. Remove redundant macro 'CLIP_SH/Wn_0_255_MAX_SATU' and use 'CLIP_SH/Wn_0_255'
instead, because there are no difference in the effect of this two macros.
Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/mips/idctdsp_msa.c')
-rw-r--r-- | libavcodec/mips/idctdsp_msa.c | 9 |
1 files changed, 3 insertions, 6 deletions
diff --git a/libavcodec/mips/idctdsp_msa.c b/libavcodec/mips/idctdsp_msa.c index b29e420556..b6b98dc7fc 100644 --- a/libavcodec/mips/idctdsp_msa.c +++ b/libavcodec/mips/idctdsp_msa.c @@ -28,8 +28,7 @@ static void put_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, v8i16 in0, in1, in2, in3, in4, in5, in6, in7; LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7); - CLIP_SH4_0_255(in0, in1, in2, in3); - CLIP_SH4_0_255(in4, in5, in6, in7); + CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7); PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3); PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7); @@ -63,8 +62,7 @@ static void put_signed_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, in6 += 128; in7 += 128; - CLIP_SH4_0_255(in0, in1, in2, in3); - CLIP_SH4_0_255(in4, in5, in6, in7); + CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7); PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3); PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7); @@ -109,8 +107,7 @@ static void add_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, in6 += (v8i16) pix6; in7 += (v8i16) pix7; - CLIP_SH4_0_255(in0, in1, in2, in3); - CLIP_SH4_0_255(in4, in5, in6, in7); + CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7); PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3); PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7); |