ARM: align stack in NEON h264 mc functions

A certain rotten fruit operating system doesn't provide the 8-byte stack alignment required by the standard ARM ABI, so align it manually. Originally committed as revision 20208 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Måns Rullgård <mans@mansr.com> 2009-10-11 16:16:08 +0000
committer: Måns Rullgård <mans@mansr.com> 2009-10-11 16:16:08 +0000
commit: 0115b3eadb16ff12ba9b10946c518c1471387093 (patch)
tree: ce81f641279391ee3a6b0ae37e3aaafa6e1f1352
parent: e276d9e82d91ad3aed8d8aedab771e7509021cc2 (diff)
download: ffmpeg-0115b3eadb16ff12ba9b10946c518c1471387093.tar.gz
1 files changed, 18 insertions, 15 deletions
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index 78f312d758..edfce3a168 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -1064,9 +1064,11 @@ put_h264_qpel8_mc01:
         .endfunc
 
 function ff_put_h264_qpel8_mc11_neon, export=1
-        push            {r0, r1, r2, lr}
+        push            {r0, r1, r11, lr}
 put_h264_qpel8_mc11:
         lowpass_const   r3
+        mov             r11, sp
+        bic             sp,  sp,  #15
         sub             sp,  sp,  #64
         mov             r0,  sp
         sub             r1,  r1,  #2
@@ -1074,15 +1076,15 @@ put_h264_qpel8_mc11:
         mov             ip,  #8
         vpush           {d8-d15}
         bl              put_h264_qpel8_h_lowpass_neon
-        ldrd            r0,  [sp, #128]
+        ldrd            r0,  [r11]
         mov             r3,  r2
         add             ip,  sp,  #64
         sub             r1,  r1,  r2, lsl #1
         mov             r2,  #8
         bl              put_h264_qpel8_v_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  sp,  #76
-        pop             {pc}
+        add             sp,  r11, #8
+        pop             {r11, pc}
         .endfunc
 
 function ff_put_h264_qpel8_mc21_neon, export=1
@@ -1112,7 +1114,7 @@ put_h264_qpel8_mc21:
 
 function ff_put_h264_qpel8_mc31_neon, export=1
         add             r1,  r1,  #1
-        push            {r0, r1, r2, lr}
+        push            {r0, r1, r11, lr}
         sub             r1,  r1,  #1
         b               put_h264_qpel8_mc11
         .endfunc
@@ -1181,7 +1183,7 @@ function ff_put_h264_qpel8_mc03_neon, export=1
         .endfunc
 
 function ff_put_h264_qpel8_mc13_neon, export=1
-        push            {r0, r1, r2, lr}
+        push            {r0, r1, r11, lr}
         add             r1,  r1,  r2
         b               put_h264_qpel8_mc11
         .endfunc
@@ -1194,7 +1196,7 @@ function ff_put_h264_qpel8_mc23_neon, export=1
 
 function ff_put_h264_qpel8_mc33_neon, export=1
         add             r1,  r1,  #1
-        push            {r0, r1, r2, lr}
+        push            {r0, r1, r11, lr}
         add             r1,  r1,  r2
         sub             r1,  r1,  #1
         b               put_h264_qpel8_mc11
@@ -1235,25 +1237,26 @@ put_h264_qpel16_mc01:
         .endfunc
 
 function ff_put_h264_qpel16_mc11_neon, export=1
-        push            {r0, r1, r4, lr}
+        push            {r0, r1, r4, r11, lr}
 put_h264_qpel16_mc11:
         lowpass_const   r3
+        mov             r11, sp
+        bic             sp,  sp,  #15
         sub             sp,  sp,  #256
         mov             r0,  sp
         sub             r1,  r1,  #2
         mov             r3,  #16
         vpush           {d8-d15}
         bl              put_h264_qpel16_h_lowpass_neon
-        add             r0,  sp,  #256
-        ldrd            r0,  [r0, #64]
+        ldrd            r0,  [r11]
         mov             r3,  r2
         add             ip,  sp,  #64
         sub             r1,  r1,  r2, lsl #1
         mov             r2,  #16
         bl              put_h264_qpel16_v_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  sp,  #(256+8)
-        pop             {r4, pc}
+        add             sp,  r11, #8
+        pop             {r4, r11, pc}
         .endfunc
 
 function ff_put_h264_qpel16_mc21_neon, export=1
@@ -1280,7 +1283,7 @@ put_h264_qpel16_mc21:
 
 function ff_put_h264_qpel16_mc31_neon, export=1
         add             r1,  r1,  #1
-        push            {r0, r1, r4, lr}
+        push            {r0, r1, r4, r11, lr}
         sub             r1,  r1,  #1
         b               put_h264_qpel16_mc11
         .endfunc
@@ -1349,7 +1352,7 @@ function ff_put_h264_qpel16_mc03_neon, export=1
         .endfunc
 
 function ff_put_h264_qpel16_mc13_neon, export=1
-        push            {r0, r1, r4, lr}
+        push            {r0, r1, r4, r11, lr}
         add             r1,  r1,  r2
         b               put_h264_qpel16_mc11
         .endfunc
@@ -1362,7 +1365,7 @@ function ff_put_h264_qpel16_mc23_neon, export=1
 
 function ff_put_h264_qpel16_mc33_neon, export=1
         add             r1,  r1,  #1
-        push            {r0, r1, r4, lr}
+        push            {r0, r1, r4, r11, lr}
         add             r1,  r1,  r2
         sub             r1,  r1,  #1
         b               put_h264_qpel16_mc11
author	Måns Rullgård <mans@mansr.com>	2009-10-11 16:16:08 +0000
committer	Måns Rullgård <mans@mansr.com>	2009-10-11 16:16:08 +0000
commit	0115b3eadb16ff12ba9b10946c518c1471387093 (patch)
tree	ce81f641279391ee3a6b0ae37e3aaafa6e1f1352
parent	e276d9e82d91ad3aed8d8aedab771e7509021cc2 (diff)
download	ffmpeg-0115b3eadb16ff12ba9b10946c518c1471387093.tar.gz