diff options
author | Martin Storsjö <martin@martin.st> | 2017-02-26 22:13:10 +0200 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2017-03-19 22:53:28 +0200 |
commit | 70317b25aa35c0907720e4d2b7686408588c07aa (patch) | |
tree | 3a6d1508b983e92636d41329391db13faa10d5ec | |
parent | b7a565fe71d16747209bd66955a54c9b54abc5dd (diff) | |
download | ffmpeg-70317b25aa35c0907720e4d2b7686408588c07aa.tar.gz |
arm/aarch64: vp9itxfm: Skip loading the min_eob pointer when it won't be used
In the half/quarter cases where we don't use the min_eob array, defer
loading the pointer until we know it will be needed.
This is cherrypicked from libav commit
3a0d5e206d24d41d87a25ba16a79b2ea04c39d4c.
Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r-- | libavcodec/aarch64/vp9itxfm_neon.S | 3 | ||||
-rw-r--r-- | libavcodec/arm/vp9itxfm_neon.S | 4 |
2 files changed, 4 insertions, 3 deletions
diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S index 2c3c002d54..3e5da0880c 100644 --- a/libavcodec/aarch64/vp9itxfm_neon.S +++ b/libavcodec/aarch64/vp9itxfm_neon.S @@ -1483,7 +1483,6 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1 b.eq idct32x32_dc_add_neon movrel x10, idct_coeffs - movrel x12, min_eob_idct_idct_32, 2 mov x15, x30 @@ -1508,6 +1507,8 @@ function ff_vp9_idct_idct_32x32_add_neon, export=1 cmp w3, #135 b.le idct32x32_half_add_neon + movrel x12, min_eob_idct_idct_32, 2 + .irp i, 0, 8, 16, 24 add x0, sp, #(\i*64) .if \i > 0 diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S index adc9896db4..6d4d765c28 100644 --- a/libavcodec/arm/vp9itxfm_neon.S +++ b/libavcodec/arm/vp9itxfm_neon.S @@ -889,8 +889,6 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1 push {r4-r8,lr} .ifnc \txfm1\()_\txfm2,idct_idct vpush {q4-q7} -.else - movrel r8, min_eob_idct_idct_16 + 2 .endif @ Align the stack, allocate a temp buffer @@ -914,6 +912,8 @@ A and r7, sp, #15 ble idct16x16_quarter_add_neon cmp r3, #38 ble idct16x16_half_add_neon + + movrel r8, min_eob_idct_idct_16 + 2 .endif .irp i, 0, 4, 8, 12 |