aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2024-10-04 00:30:24 +0300
committerMartin Storsjö <martin@martin.st>2024-10-09 15:52:51 +0300
commit77e6293735262b20a86b5047b77991a86cf4e9e1 (patch)
tree106b9fe7138490f7eaa4bb0c69733ca767a151e3
parentec9985b54f68f58519848096ee7f6b1476052871 (diff)
downloadffmpeg-77e6293735262b20a86b5047b77991a86cf4e9e1.tar.gz
arm: Consistently use proper interworking function returns
Use "bx lr", or "pop {lr}", which do proper mode switching between thumb and arm modes. A plain "mov pc, lr" does not switch from thumb mode to arm mode (while in arm mode, it does switch mode for a thumb caller). This is normally not an issue, as CONFIG_THUMB only is enabled if the C compiler defaults to thumb; but stick to patterns that can do mode switching if needed, for consistency. Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r--libswresample/arm/resample.S8
-rw-r--r--libswscale/arm/hscale.S3
-rw-r--r--libswscale/arm/output.S3
-rw-r--r--libswscale/arm/yuv2rgb_neon.S3
4 files changed, 7 insertions, 10 deletions
diff --git a/libswresample/arm/resample.S b/libswresample/arm/resample.S
index 3ce7623246..791f4cc016 100644
--- a/libswresample/arm/resample.S
+++ b/libswresample/arm/resample.S
@@ -30,7 +30,7 @@ function ff_resample_common_apply_filter_x4_float_neon, export=1
vpadd.f32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values
vpadd.f32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values
vst1.32 {d0[0]}, [r0] @ write accumulator
- mov pc, lr
+ bx lr
endfunc
function ff_resample_common_apply_filter_x8_float_neon, export=1
@@ -46,7 +46,7 @@ function ff_resample_common_apply_filter_x8_float_neon, export=1
vpadd.f32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values
vpadd.f32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values
vst1.32 {d0[0]}, [r0] @ write accumulator
- mov pc, lr
+ bx lr
endfunc
function ff_resample_common_apply_filter_x4_s16_neon, export=1
@@ -59,7 +59,7 @@ function ff_resample_common_apply_filter_x4_s16_neon, export=1
vpadd.s32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values
vpadd.s32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values
vst1.32 {d0[0]}, [r0] @ write accumulator
- mov pc, lr
+ bx lr
endfunc
function ff_resample_common_apply_filter_x8_s16_neon, export=1
@@ -73,5 +73,5 @@ function ff_resample_common_apply_filter_x8_s16_neon, export=1
vpadd.s32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values
vpadd.s32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values
vst1.32 {d0[0]}, [r0] @ write accumulator
- mov pc, lr
+ bx lr
endfunc
diff --git a/libswscale/arm/hscale.S b/libswscale/arm/hscale.S
index dd4d453957..5c3551a0f1 100644
--- a/libswscale/arm/hscale.S
+++ b/libswscale/arm/hscale.S
@@ -65,6 +65,5 @@ function ff_hscale_8_to_15_neon, export=1
subs r2, #2 @ dstW -= 2
bgt 1b @ loop until end of line
vpop {q4-q7}
- pop {r4-r12, lr}
- mov pc, lr
+ pop {r4-r12, pc}
endfunc
diff --git a/libswscale/arm/output.S b/libswscale/arm/output.S
index 70846dee1f..5f10585f81 100644
--- a/libswscale/arm/output.S
+++ b/libswscale/arm/output.S
@@ -73,6 +73,5 @@ function ff_yuv2planeX_8_neon, export=1
subs r4, r4, #8 @ dstW -= 8
bgt 2b @ loop until width is consumed
vpop {q4-q7}
- pop {r4-r12, lr}
- mov pc, lr
+ pop {r4-r12, pc}
endfunc
diff --git a/libswscale/arm/yuv2rgb_neon.S b/libswscale/arm/yuv2rgb_neon.S
index 474465427d..6777d625f9 100644
--- a/libswscale/arm/yuv2rgb_neon.S
+++ b/libswscale/arm/yuv2rgb_neon.S
@@ -262,8 +262,7 @@ function ff_\ifmt\()_to_\ofmt\()_neon, export=1
increment_and_test_\ifmt
bgt 1b
vpop {q4-q7}
- pop {r4-r12, lr}
- mov pc, lr
+ pop {r4-r12, pc}
endfunc
.endm