diff options
author | Shiyou Yin <yinshiyou-hf@loongson.cn> | 2019-07-17 17:35:00 +0800 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2019-07-19 01:23:23 +0200 |
commit | 153c60752558369b98dce0b7a0ca7acc687fa630 (patch) | |
tree | b1dc51fc656ac92b1ac3f377ee75c6808bec0369 /libavcodec/mips/h264idct_msa.c | |
parent | 00ed04d6149691a9abf486b2f88172fd6341d801 (diff) | |
download | ffmpeg-153c60752558369b98dce0b7a0ca7acc687fa630.tar.gz |
avutil/mips: refactor msa load and store macros.
Replace STnxm_UB and LDnxm_SH with new macros ST_{H/W/D}{1/2/4/8}.
The old macros are difficult to use because they don't follow the same parameter passing rules.
Changing details as following:
1. remove LD4x4_SH.
2. replace ST2x4_UB with ST_H4.
3. replace ST4x2_UB with ST_W2.
4. replace ST4x4_UB with ST_W4.
5. replace ST4x8_UB with ST_W8.
6. replace ST6x4_UB with ST_W2 and ST_H2.
7. replace ST8x1_UB with ST_D1.
8. replace ST8x2_UB with ST_D2.
9. replace ST8x4_UB with ST_D4.
10. replace ST8x8_UB with ST_D8.
11. replace ST12x4_UB with ST_D4 and ST_W4.
Examples of new macro: ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride)
ST_H4 store four half-word elements in vector 'in' to pdst with stride.
About the macro name:
1) 'ST' means store operation.
2) 'H/W/D' means type of vector element is 'half-word/word/double-word'.
3) Number '1/2/4/8' means how many elements will be stored.
About the macro parameter:
1) 'in0, in1...' 128-bits vector.
2) 'idx0, idx1...' elements index.
3) 'pdst' destination pointer to store to
4) 'stride' stride of each store operation.
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/mips/h264idct_msa.c')
-rw-r--r-- | libavcodec/mips/h264idct_msa.c | 10 |
1 files changed, 3 insertions, 7 deletions
diff --git a/libavcodec/mips/h264idct_msa.c b/libavcodec/mips/h264idct_msa.c index 1e1a5c8cb8..7851bfdf4b 100644 --- a/libavcodec/mips/h264idct_msa.c +++ b/libavcodec/mips/h264idct_msa.c @@ -237,9 +237,7 @@ static void avc_idct8_addblk_msa(uint8_t *dst, int16_t *src, int32_t dst_stride) CLIP_SH4_0_255(res4, res5, res6, res7); PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6, dst0, dst1, dst2, dst3); - ST8x4_UB(dst0, dst1, dst, dst_stride); - dst += (4 * dst_stride); - ST8x4_UB(dst2, dst3, dst, dst_stride); + ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, dst_stride) } static void avc_idct8_dc_addblk_msa(uint8_t *dst, int16_t *src, @@ -269,9 +267,7 @@ static void avc_idct8_dc_addblk_msa(uint8_t *dst, int16_t *src, CLIP_SH4_0_255(dst4_r, dst5_r, dst6_r, dst7_r); PCKEV_B4_SB(dst1_r, dst0_r, dst3_r, dst2_r, dst5_r, dst4_r, dst7_r, dst6_r, dst0, dst1, dst2, dst3); - ST8x4_UB(dst0, dst1, dst, dst_stride); - dst += (4 * dst_stride); - ST8x4_UB(dst2, dst3, dst, dst_stride); + ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, dst_stride) } void ff_h264_idct_add_msa(uint8_t *dst, int16_t *src, int32_t dst_stride) @@ -340,7 +336,7 @@ void ff_h264_idct4x4_addblk_dc_msa(uint8_t *dst, int16_t *src, ADD2(pred_r, input_dc, pred_l, input_dc, pred_r, pred_l); CLIP_SH2_0_255(pred_r, pred_l); out = __msa_pckev_b((v16i8) pred_l, (v16i8) pred_r); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); + ST_W4(out, 0, 1, 2, 3, dst, dst_stride); } void ff_h264_idct8_dc_addblk_msa(uint8_t *dst, int16_t *src, |