aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2016-12-17 00:55:41 +0200
committerMartin Storsjö <martin@martin.st>2017-03-11 13:14:47 +0200
commitac6cb8ae5b1c56c4a3fceb635c60d05e447c4365 (patch)
tree8a5a2866732efca3d92783c8f64fdd0ee5398fa3
parent16ef000799b227d0226b7a678d28c34ff1d09410 (diff)
downloadffmpeg-ac6cb8ae5b1c56c4a3fceb635c60d05e447c4365.tar.gz
aarch64: vp9mc: Simplify the extmla macro parameters
Fold the field lengths into the macro. This makes the macro invocations much more readable, when the lines are shorter. This also makes it easier to use only half the registers within the macro. This is cherrypicked from libav commit 5e0c2158fbc774f87d3ce4b7b950ba4d42c4a7b8. Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r--libavcodec/aarch64/vp9mc_neon.S50
1 files changed, 25 insertions, 25 deletions
diff --git a/libavcodec/aarch64/vp9mc_neon.S b/libavcodec/aarch64/vp9mc_neon.S
index 80d1d238d6..94039114bd 100644
--- a/libavcodec/aarch64/vp9mc_neon.S
+++ b/libavcodec/aarch64/vp9mc_neon.S
@@ -193,41 +193,41 @@ endfunc
// for size >= 16), and multiply-accumulate into dst1 and dst3 (or
// dst1-dst2 and dst3-dst4 for size >= 16)
.macro extmla dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, offset, size
- ext v20.16b, \src1, \src2, #(2*\offset)
- ext v22.16b, \src4, \src5, #(2*\offset)
+ ext v20.16b, \src1\().16b, \src2\().16b, #(2*\offset)
+ ext v22.16b, \src4\().16b, \src5\().16b, #(2*\offset)
.if \size >= 16
- mla \dst1, v20.8h, v0.h[\offset]
- ext v21.16b, \src2, \src3, #(2*\offset)
- mla \dst3, v22.8h, v0.h[\offset]
- ext v23.16b, \src5, \src6, #(2*\offset)
- mla \dst2, v21.8h, v0.h[\offset]
- mla \dst4, v23.8h, v0.h[\offset]
+ mla \dst1\().8h, v20.8h, v0.h[\offset]
+ ext v21.16b, \src2\().16b, \src3\().16b, #(2*\offset)
+ mla \dst3\().8h, v22.8h, v0.h[\offset]
+ ext v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
+ mla \dst2\().8h, v21.8h, v0.h[\offset]
+ mla \dst4\().8h, v23.8h, v0.h[\offset]
.else
- mla \dst1, v20.8h, v0.h[\offset]
- mla \dst3, v22.8h, v0.h[\offset]
+ mla \dst1\().8h, v20.8h, v0.h[\offset]
+ mla \dst3\().8h, v22.8h, v0.h[\offset]
.endif
.endm
// The same as above, but don't accumulate straight into the
// destination, but use a temp register and accumulate with saturation.
.macro extmulqadd dst1, dst2, dst3, dst4, src1, src2, src3, src4, src5, src6, offset, size
- ext v20.16b, \src1, \src2, #(2*\offset)
- ext v22.16b, \src4, \src5, #(2*\offset)
+ ext v20.16b, \src1\().16b, \src2\().16b, #(2*\offset)
+ ext v22.16b, \src4\().16b, \src5\().16b, #(2*\offset)
.if \size >= 16
mul v20.8h, v20.8h, v0.h[\offset]
- ext v21.16b, \src2, \src3, #(2*\offset)
+ ext v21.16b, \src2\().16b, \src3\().16b, #(2*\offset)
mul v22.8h, v22.8h, v0.h[\offset]
- ext v23.16b, \src5, \src6, #(2*\offset)
+ ext v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
mul v21.8h, v21.8h, v0.h[\offset]
mul v23.8h, v23.8h, v0.h[\offset]
.else
mul v20.8h, v20.8h, v0.h[\offset]
mul v22.8h, v22.8h, v0.h[\offset]
.endif
- sqadd \dst1, \dst1, v20.8h
- sqadd \dst3, \dst3, v22.8h
+ sqadd \dst1\().8h, \dst1\().8h, v20.8h
+ sqadd \dst3\().8h, \dst3\().8h, v22.8h
.if \size >= 16
- sqadd \dst2, \dst2, v21.8h
- sqadd \dst4, \dst4, v23.8h
+ sqadd \dst2\().8h, \dst2\().8h, v21.8h
+ sqadd \dst4\().8h, \dst4\().8h, v23.8h
.endif
.endm
@@ -291,13 +291,13 @@ function \type\()_8tap_\size\()h_\idx1\idx2
mul v2.8h, v5.8h, v0.h[0]
mul v25.8h, v17.8h, v0.h[0]
.endif
- extmla v1.8h, v2.8h, v24.8h, v25.8h, v4.16b, v5.16b, v6.16b, v16.16b, v17.16b, v18.16b, 1, \size
- extmla v1.8h, v2.8h, v24.8h, v25.8h, v4.16b, v5.16b, v6.16b, v16.16b, v17.16b, v18.16b, 2, \size
- extmla v1.8h, v2.8h, v24.8h, v25.8h, v4.16b, v5.16b, v6.16b, v16.16b, v17.16b, v18.16b, \idx1, \size
- extmla v1.8h, v2.8h, v24.8h, v25.8h, v4.16b, v5.16b, v6.16b, v16.16b, v17.16b, v18.16b, 5, \size
- extmla v1.8h, v2.8h, v24.8h, v25.8h, v4.16b, v5.16b, v6.16b, v16.16b, v17.16b, v18.16b, 6, \size
- extmla v1.8h, v2.8h, v24.8h, v25.8h, v4.16b, v5.16b, v6.16b, v16.16b, v17.16b, v18.16b, 7, \size
- extmulqadd v1.8h, v2.8h, v24.8h, v25.8h, v4.16b, v5.16b, v6.16b, v16.16b, v17.16b, v18.16b, \idx2, \size
+ extmla v1, v2, v24, v25, v4, v5, v6, v16, v17, v18, 1, \size
+ extmla v1, v2, v24, v25, v4, v5, v6, v16, v17, v18, 2, \size
+ extmla v1, v2, v24, v25, v4, v5, v6, v16, v17, v18, \idx1, \size
+ extmla v1, v2, v24, v25, v4, v5, v6, v16, v17, v18, 5, \size
+ extmla v1, v2, v24, v25, v4, v5, v6, v16, v17, v18, 6, \size
+ extmla v1, v2, v24, v25, v4, v5, v6, v16, v17, v18, 7, \size
+ extmulqadd v1, v2, v24, v25, v4, v5, v6, v16, v17, v18, \idx2, \size
// Round, shift and saturate
sqrshrun v1.8b, v1.8h, #7