diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-02-06 15:33:54 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-02-06 15:33:54 +0100 |
commit | c73445a45cdb6cc8c29f411fbf314500e48cf3b5 (patch) | |
tree | 8f1d6af7d37e4ab8d783bf008799a7e26bbe2d4b | |
parent | 9c978f243a47a0906fb32d723fcdd37d7b8cee93 (diff) | |
parent | 49ec5515956405a240b0f2a092d927104874b16a (diff) | |
download | ffmpeg-c73445a45cdb6cc8c29f411fbf314500e48cf3b5.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
vp8: Use 2 registers for dst_stride and src_stride in neon bilin filter
Conflicts:
libavcodec/arm/vp8dsp_neon.S
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/arm/vp8dsp_neon.S | 117 |
1 files changed, 54 insertions, 63 deletions
diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S index 436b340330..5319346951 100644 --- a/libavcodec/arm/vp8dsp_neon.S +++ b/libavcodec/arm/vp8dsp_neon.S @@ -1576,12 +1576,11 @@ endconst /* Bilinear MC */ function ff_put_vp8_bilin16_h_neon, export=1 - push {lr} - ldr lr, [sp, #8] @ mx - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h 1: subs r12, r12, #2 vld1.8 {d2-d4}, [r2], r3 @@ -1604,16 +1603,15 @@ function ff_put_vp8_bilin16_h_neon, export=1 vst1.8 {q3}, [r0,:128], r1 bgt 1b - pop {pc} + bx lr endfunc function ff_put_vp8_bilin16_v_neon, export=1 - push {lr} - ldr lr, [sp, #12] @ my - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #8] @ my + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h vld1.8 {q1}, [r2], r3 1: subs r12, r12, #2 @@ -1635,20 +1633,19 @@ function ff_put_vp8_bilin16_v_neon, export=1 vst1.8 {q3}, [r0,:128], r1 bgt 1b - pop {pc} + bx lr endfunc function ff_put_vp8_bilin16_hv_neon, export=1 - push {lr} - ldr lr, [sp, #8] @ mx - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr lr, [sp, #12] @ my - rsb r12, lr, #8 - vdup.8 d2, lr + ldr r12, [sp, #8] @ my + vdup.8 d2, r12 + rsb r12, r12, #8 vdup.8 d3, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h vld1.8 {d4-d6}, [r2], r3 vext.8 q3, q2, q3, #1 @@ -1692,16 +1689,15 @@ function ff_put_vp8_bilin16_hv_neon, export=1 vst1.8 {q10}, [r0,:128], r1 bgt 1b - pop {pc} + bx lr endfunc function ff_put_vp8_bilin8_h_neon, export=1 - push {lr} - ldr lr, [sp, #8] @ mx - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h 1: subs r12, r12, #2 vld1.8 {q1}, [r2], r3 @@ -1718,16 +1714,15 @@ function ff_put_vp8_bilin8_h_neon, export=1 vst1.8 {d16}, [r0,:64], r1 bgt 1b - pop {pc} + bx lr endfunc function ff_put_vp8_bilin8_v_neon, export=1 - push {lr} - ldr lr, [sp, #12] @ my - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #8] @ my + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h vld1.8 {d2}, [r2], r3 1: subs r12, r12, #2 @@ -1743,20 +1738,19 @@ function ff_put_vp8_bilin8_v_neon, export=1 vst1.8 {d6}, [r0,:64], r1 bgt 1b - pop {pc} + bx lr endfunc function ff_put_vp8_bilin8_hv_neon, export=1 - push {lr} - ldr lr, [sp, #8] @ mx - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr lr, [sp, #12] @ my - rsb r12, lr, #8 - vdup.8 d2, lr + ldr r12, [sp, #8] @ my + vdup.8 d2, r12 + rsb r12, r12, #8 vdup.8 d3, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h vld1.8 {q2}, [r2], r3 vext.8 d5, d4, d5, #1 @@ -1785,16 +1779,15 @@ function ff_put_vp8_bilin8_hv_neon, export=1 vst1.8 {d23}, [r0,:64], r1 bgt 1b - pop {pc} + bx lr endfunc function ff_put_vp8_bilin4_h_neon, export=1 - push {lr} - ldr lr, [sp, #8] @ mx - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h 1: subs r12, r12, #2 vld1.8 {d2}, [r2], r3 @@ -1809,16 +1802,15 @@ function ff_put_vp8_bilin4_h_neon, export=1 vst1.32 {d4[1]}, [r0,:32], r1 bgt 1b - pop {pc} + bx lr endfunc function ff_put_vp8_bilin4_v_neon, export=1 - push {lr} - ldr lr, [sp, #12] @ my - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #8] @ my + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h vld1.32 {d2[]}, [r2], r3 1: vld1.32 {d3[]}, [r2] @@ -1833,20 +1825,19 @@ function ff_put_vp8_bilin4_v_neon, export=1 subs r12, r12, #2 bgt 1b - pop {pc} + bx lr endfunc function ff_put_vp8_bilin4_hv_neon, export=1 - push {lr} - ldr lr, [sp, #8] @ mx - rsb r12, lr, #8 - vdup.8 d0, lr + ldr r12, [sp, #4] @ mx + vdup.8 d0, r12 + rsb r12, r12, #8 vdup.8 d1, r12 - ldr lr, [sp, #12] @ my - rsb r12, lr, #8 - vdup.8 d2, lr + ldr r12, [sp, #8] @ my + vdup.8 d2, r12 + rsb r12, r12, #8 vdup.8 d3, r12 - ldr r12, [sp, #4] @ h + ldr r12, [sp] @ h vld1.8 {d4}, [r2], r3 vext.8 d5, d4, d4, #1 @@ -1872,5 +1863,5 @@ function ff_put_vp8_bilin4_hv_neon, export=1 vst1.32 {d20[1]}, [r0,:32], r1 bgt 1b - pop {pc} + bx lr endfunc |