diff options
author | Måns Rullgård <mans@mansr.com> | 2009-10-11 16:16:08 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2009-10-11 16:16:08 +0000 |
commit | 0115b3eadb16ff12ba9b10946c518c1471387093 (patch) | |
tree | ce81f641279391ee3a6b0ae37e3aaafa6e1f1352 | |
parent | e276d9e82d91ad3aed8d8aedab771e7509021cc2 (diff) | |
download | ffmpeg-0115b3eadb16ff12ba9b10946c518c1471387093.tar.gz |
ARM: align stack in NEON h264 mc functions
A certain rotten fruit operating system doesn't provide the 8-byte stack
alignment required by the standard ARM ABI, so align it manually.
Originally committed as revision 20208 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/arm/h264dsp_neon.S | 33 |
1 files changed, 18 insertions, 15 deletions
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S index 78f312d758..edfce3a168 100644 --- a/libavcodec/arm/h264dsp_neon.S +++ b/libavcodec/arm/h264dsp_neon.S @@ -1064,9 +1064,11 @@ put_h264_qpel8_mc01: .endfunc function ff_put_h264_qpel8_mc11_neon, export=1 - push {r0, r1, r2, lr} + push {r0, r1, r11, lr} put_h264_qpel8_mc11: lowpass_const r3 + mov r11, sp + bic sp, sp, #15 sub sp, sp, #64 mov r0, sp sub r1, r1, #2 @@ -1074,15 +1076,15 @@ put_h264_qpel8_mc11: mov ip, #8 vpush {d8-d15} bl put_h264_qpel8_h_lowpass_neon - ldrd r0, [sp, #128] + ldrd r0, [r11] mov r3, r2 add ip, sp, #64 sub r1, r1, r2, lsl #1 mov r2, #8 bl put_h264_qpel8_v_lowpass_l2_neon vpop {d8-d15} - add sp, sp, #76 - pop {pc} + add sp, r11, #8 + pop {r11, pc} .endfunc function ff_put_h264_qpel8_mc21_neon, export=1 @@ -1112,7 +1114,7 @@ put_h264_qpel8_mc21: function ff_put_h264_qpel8_mc31_neon, export=1 add r1, r1, #1 - push {r0, r1, r2, lr} + push {r0, r1, r11, lr} sub r1, r1, #1 b put_h264_qpel8_mc11 .endfunc @@ -1181,7 +1183,7 @@ function ff_put_h264_qpel8_mc03_neon, export=1 .endfunc function ff_put_h264_qpel8_mc13_neon, export=1 - push {r0, r1, r2, lr} + push {r0, r1, r11, lr} add r1, r1, r2 b put_h264_qpel8_mc11 .endfunc @@ -1194,7 +1196,7 @@ function ff_put_h264_qpel8_mc23_neon, export=1 function ff_put_h264_qpel8_mc33_neon, export=1 add r1, r1, #1 - push {r0, r1, r2, lr} + push {r0, r1, r11, lr} add r1, r1, r2 sub r1, r1, #1 b put_h264_qpel8_mc11 @@ -1235,25 +1237,26 @@ put_h264_qpel16_mc01: .endfunc function ff_put_h264_qpel16_mc11_neon, export=1 - push {r0, r1, r4, lr} + push {r0, r1, r4, r11, lr} put_h264_qpel16_mc11: lowpass_const r3 + mov r11, sp + bic sp, sp, #15 sub sp, sp, #256 mov r0, sp sub r1, r1, #2 mov r3, #16 vpush {d8-d15} bl put_h264_qpel16_h_lowpass_neon - add r0, sp, #256 - ldrd r0, [r0, #64] + ldrd r0, [r11] mov r3, r2 add ip, sp, #64 sub r1, r1, r2, lsl #1 mov r2, #16 bl put_h264_qpel16_v_lowpass_l2_neon vpop {d8-d15} - add sp, sp, #(256+8) - pop {r4, pc} + add sp, r11, #8 + pop {r4, r11, pc} .endfunc function ff_put_h264_qpel16_mc21_neon, export=1 @@ -1280,7 +1283,7 @@ put_h264_qpel16_mc21: function ff_put_h264_qpel16_mc31_neon, export=1 add r1, r1, #1 - push {r0, r1, r4, lr} + push {r0, r1, r4, r11, lr} sub r1, r1, #1 b put_h264_qpel16_mc11 .endfunc @@ -1349,7 +1352,7 @@ function ff_put_h264_qpel16_mc03_neon, export=1 .endfunc function ff_put_h264_qpel16_mc13_neon, export=1 - push {r0, r1, r4, lr} + push {r0, r1, r4, r11, lr} add r1, r1, r2 b put_h264_qpel16_mc11 .endfunc @@ -1362,7 +1365,7 @@ function ff_put_h264_qpel16_mc23_neon, export=1 function ff_put_h264_qpel16_mc33_neon, export=1 add r1, r1, #1 - push {r0, r1, r4, lr} + push {r0, r1, r4, r11, lr} add r1, r1, r2 sub r1, r1, #1 b put_h264_qpel16_mc11 |