avutil/mips: refine msa macros CLIP_*.

Changing details as following: 1. Remove the local variable 'out_m' in 'CLIP_SH' and store the result in source vector. 2. Refine the implementation of macro 'CLIP_SH_0_255' and 'CLIP_SW_0_255'. Performance of VP8 decoding has speed up about 1.1%(from 7.03x to 7.11x). Performance of H264 decoding has speed up about 0.5%(from 4.35x to 4.37x). Performance of Theora decoding has speed up about 0.7%(from 5.79x to 5.83x). 3. Remove redundant macro 'CLIP_SH/Wn_0_255_MAX_SATU' and use 'CLIP_SH/Wn_0_255' instead, because there are no difference in the effect of this two macros. Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
author: gxw <guxiwei-hf@loongson.cn> 2019-08-07 17:52:00 +0800
committer: Michael Niedermayer <michael@niedermayer.cc> 2019-08-13 16:48:38 +0200
commit: a3e572d96fd1dd6291f6b28e173db858c08ff8d8 (patch)
tree: 85807f6ec1442cc362cf8946e67f564c92267e07 /libavutil/mips
parent: 8f92eb05e063e6c4d6e36521020620d4e6e1c21d (diff)
download: ffmpeg-a3e572d96fd1dd6291f6b28e173db858c08ff8d8.tar.gz
1 files changed, 49 insertions, 70 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index 9ac0583765..681d87c458 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -933,99 +933,78 @@
 
 /* Description : Clips all halfword elements of input vector between min & max
                  out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in))
-   Arguments   : Inputs  - in       (input vector)
-                         - min      (min threshold)
-                         - max      (max threshold)
-                 Outputs - out_m    (output vector with clipped elements)
+   Arguments   : Inputs  - in    (input vector)
+                         - min   (min threshold)
+                         - max   (max threshold)
+                 Outputs - in    (output vector with clipped elements)
                  Return Type - signed halfword
 */
-#define CLIP_SH(in, min, max)                           \
-( {                                                     \
-    v8i16 out_m;                                        \
-                                                        \
-    out_m = __msa_max_s_h((v8i16) min, (v8i16) in);     \
-    out_m = __msa_min_s_h((v8i16) max, (v8i16) out_m);  \
-    out_m;                                              \
-} )
+#define CLIP_SH(in, min, max)                     \
+{                                                 \
+    in = __msa_max_s_h((v8i16) min, (v8i16) in);  \
+    in = __msa_min_s_h((v8i16) max, (v8i16) in);  \
+}
 
 /* Description : Clips all signed halfword elements of input vector
                  between 0 & 255
-   Arguments   : Inputs  - in       (input vector)
-                 Outputs - out_m    (output vector with clipped elements)
-                 Return Type - signed halfword
+   Arguments   : Inputs  - in    (input vector)
+                 Outputs - in    (output vector with clipped elements)
+                 Return Type - signed halfwords
 */
-#define CLIP_SH_0_255(in)                                 \
-( {                                                       \
-    v8i16 max_m = __msa_ldi_h(255);                       \
-    v8i16 out_m;                                          \
-                                                          \
-    out_m = __msa_maxi_s_h((v8i16) in, 0);                \
-    out_m = __msa_min_s_h((v8i16) max_m, (v8i16) out_m);  \
-    out_m;                                                \
-} )
+#define CLIP_SH_0_255(in)                       \
+{                                               \
+    in = __msa_maxi_s_h((v8i16) in, 0);         \
+    in = (v8i16) __msa_sat_u_h((v8u16) in, 7);  \
+}
+
 #define CLIP_SH2_0_255(in0, in1)  \
 {                                 \
-    in0 = CLIP_SH_0_255(in0);     \
-    in1 = CLIP_SH_0_255(in1);     \
+    CLIP_SH_0_255(in0);           \
+    CLIP_SH_0_255(in1);           \
 }
+
 #define CLIP_SH4_0_255(in0, in1, in2, in3)  \
 {                                           \
     CLIP_SH2_0_255(in0, in1);               \
     CLIP_SH2_0_255(in2, in3);               \
 }
 
-#define CLIP_SH_0_255_MAX_SATU(in)                    \
-( {                                                   \
-    v8i16 out_m;                                      \
-                                                      \
-    out_m = __msa_maxi_s_h((v8i16) in, 0);            \
-    out_m = (v8i16) __msa_sat_u_h((v8u16) out_m, 7);  \
-    out_m;                                            \
-} )
-#define CLIP_SH2_0_255_MAX_SATU(in0, in1)  \
-{                                          \
-    in0 = CLIP_SH_0_255_MAX_SATU(in0);     \
-    in1 = CLIP_SH_0_255_MAX_SATU(in1);     \
-}
-#define CLIP_SH4_0_255_MAX_SATU(in0, in1, in2, in3)  \
-{                                                    \
-    CLIP_SH2_0_255_MAX_SATU(in0, in1);               \
-    CLIP_SH2_0_255_MAX_SATU(in2, in3);               \
+#define CLIP_SH8_0_255(in0, in1, in2, in3,  \
+                       in4, in5, in6, in7)  \
+{                                           \
+    CLIP_SH4_0_255(in0, in1, in2, in3);     \
+    CLIP_SH4_0_255(in4, in5, in6, in7);     \
 }
 
 /* Description : Clips all signed word elements of input vector
                  between 0 & 255
-   Arguments   : Inputs  - in       (input vector)
-                 Outputs - out_m    (output vector with clipped elements)
+   Arguments   : Inputs  - in    (input vector)
+                 Outputs - in    (output vector with clipped elements)
                  Return Type - signed word
 */
-#define CLIP_SW_0_255(in)                                 \
-( {                                                       \
-    v4i32 max_m = __msa_ldi_w(255);                       \
-    v4i32 out_m;                                          \
-                                                          \
-    out_m = __msa_maxi_s_w((v4i32) in, 0);                \
-    out_m = __msa_min_s_w((v4i32) max_m, (v4i32) out_m);  \
-    out_m;                                                \
-} )
+#define CLIP_SW_0_255(in)                       \
+{                                               \
+    in = __msa_maxi_s_w((v4i32) in, 0);         \
+    in = (v4i32) __msa_sat_u_w((v4u32) in, 7);  \
+}
 
-#define CLIP_SW_0_255_MAX_SATU(in)                    \
-( {                                                   \
-    v4i32 out_m;                                      \
-                                                      \
-    out_m = __msa_maxi_s_w((v4i32) in, 0);            \
-    out_m = (v4i32) __msa_sat_u_w((v4u32) out_m, 7);  \
-    out_m;                                            \
-} )
-#define CLIP_SW2_0_255_MAX_SATU(in0, in1)  \
-{                                          \
-    in0 = CLIP_SW_0_255_MAX_SATU(in0);     \
-    in1 = CLIP_SW_0_255_MAX_SATU(in1);     \
+#define CLIP_SW2_0_255(in0, in1)  \
+{                                 \
+    CLIP_SW_0_255(in0);           \
+    CLIP_SW_0_255(in1);           \
 }
-#define CLIP_SW4_0_255_MAX_SATU(in0, in1, in2, in3)  \
-{                                                    \
-    CLIP_SW2_0_255_MAX_SATU(in0, in1);               \
-    CLIP_SW2_0_255_MAX_SATU(in2, in3);               \
+
+#define CLIP_SW4_0_255(in0, in1, in2, in3)  \
+{                                           \
+    CLIP_SW2_0_255(in0, in1);               \
+    CLIP_SW2_0_255(in2, in3);               \
+}
+
+#define CLIP_SW8_0_255(in0, in1, in2, in3,  \
+                       in4, in5, in6, in7)  \
+{                                           \
+    CLIP_SW4_0_255(in0, in1, in2, in3);     \
+    CLIP_SW4_0_255(in4, in5, in6, in7);     \
 }
 
 /* Description : Addition of 4 signed word elements
author	gxw <guxiwei-hf@loongson.cn>	2019-08-07 17:52:00 +0800
committer	Michael Niedermayer <michael@niedermayer.cc>	2019-08-13 16:48:38 +0200
commit	a3e572d96fd1dd6291f6b28e173db858c08ff8d8 (patch)
tree	85807f6ec1442cc362cf8946e67f564c92267e07 /libavutil/mips
parent	8f92eb05e063e6c4d6e36521020620d4e6e1c21d (diff)
download	ffmpeg-a3e572d96fd1dd6291f6b28e173db858c08ff8d8.tar.gz