aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClément Bœsch <clement@stupeflix.com>2016-04-08 13:39:22 +0200
committerClément Bœsch <clement@stupeflix.com>2016-04-08 17:58:43 +0200
commitcab9661dba472fc913fb43077c7b8dad13410e5e (patch)
tree433f4afff324c08227d801f20170dba2d86f566c
parent259879d32d124c60ef868e17e419c7c7c287674a (diff)
downloadffmpeg-cab9661dba472fc913fb43077c7b8dad13410e5e.tar.gz
sws/aarch64/yuv2rgb: honor iOS calling convention
y_offset and y_coeff being successive 32-bit integers, they are packed into 8 bytes instead of 2x8 bytes. See https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html > iOS diverges from Procedure Call Standard for the ARM 64-bit > Architecture in several ways [...] > In the generic procedure call standard, all function arguments passed > on the stack consume slots in multiples of 8 bytes. In iOS, this > requirement is dropped, and values consume only the space required. [...] > Padding is still inserted on the stack to satisfy arguments’ alignment > requirements.
-rw-r--r--libswscale/aarch64/yuv2rgb_neon.S18
1 files changed, 12 insertions, 6 deletions
diff --git a/libswscale/aarch64/yuv2rgb_neon.S b/libswscale/aarch64/yuv2rgb_neon.S
index 8cefe2291b..b7446aa105 100644
--- a/libswscale/aarch64/yuv2rgb_neon.S
+++ b/libswscale/aarch64/yuv2rgb_neon.S
@@ -21,10 +21,18 @@
#include "libavutil/aarch64/asm.S"
+.macro load_yoff_ycoeff yoff ycoeff
+#if defined(__APPLE__)
+ ldp w9, w10, [sp, #\yoff]
+#else
+ ldr w9, [sp, #\yoff]
+ ldr w10, [sp, #\ycoeff]
+#endif
+.endm
+
.macro load_args_nv12
ldr x8, [sp] // table
- ldr w9, [sp, #8] // y_offset
- ldr w10, [sp, #16] // y_coeff
+ load_yoff_ycoeff 8, 16 // y_offset, y_coeff
ld1 {v1.1D}, [x8]
dup v0.8H, w10
dup v3.8H, w9
@@ -42,8 +50,7 @@
ldr x13, [sp] // srcV
ldr w14, [sp, #8] // linesizeV
ldr x8, [sp, #16] // table
- ldr w9, [sp, #24] // y_offset
- ldr w10, [sp, #32] // y_coeff
+ load_yoff_ycoeff 24, 32 // y_offset, y_coeff
ld1 {v1.1D}, [x8]
dup v0.8H, w10
dup v3.8H, w9
@@ -59,8 +66,7 @@
ldr x13, [sp] // srcV
ldr w14, [sp, #8] // linesizeV
ldr x8, [sp, #16] // table
- ldr w9, [sp, #24] // y_offset
- ldr w10, [sp, #32] // y_coeff
+ load_yoff_ycoeff 24, 32 // y_offset, y_coeff
ld1 {v1.1D}, [x8]
dup v0.8H, w10
dup v3.8H, w9