diff options
author | Clément Bœsch <clement@stupeflix.com> | 2016-04-08 13:39:22 +0200 |
---|---|---|
committer | Clément Bœsch <clement@stupeflix.com> | 2016-04-08 17:58:43 +0200 |
commit | cab9661dba472fc913fb43077c7b8dad13410e5e (patch) | |
tree | 433f4afff324c08227d801f20170dba2d86f566c | |
parent | 259879d32d124c60ef868e17e419c7c7c287674a (diff) | |
download | ffmpeg-cab9661dba472fc913fb43077c7b8dad13410e5e.tar.gz |
sws/aarch64/yuv2rgb: honor iOS calling convention
y_offset and y_coeff being successive 32-bit integers, they are packed
into 8 bytes instead of 2x8 bytes.
See https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
> iOS diverges from Procedure Call Standard for the ARM 64-bit
> Architecture in several ways
[...]
> In the generic procedure call standard, all function arguments passed
> on the stack consume slots in multiples of 8 bytes. In iOS, this
> requirement is dropped, and values consume only the space required.
[...]
> Padding is still inserted on the stack to satisfy arguments’ alignment
> requirements.
-rw-r--r-- | libswscale/aarch64/yuv2rgb_neon.S | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/libswscale/aarch64/yuv2rgb_neon.S b/libswscale/aarch64/yuv2rgb_neon.S index 8cefe2291b..b7446aa105 100644 --- a/libswscale/aarch64/yuv2rgb_neon.S +++ b/libswscale/aarch64/yuv2rgb_neon.S @@ -21,10 +21,18 @@ #include "libavutil/aarch64/asm.S" +.macro load_yoff_ycoeff yoff ycoeff +#if defined(__APPLE__) + ldp w9, w10, [sp, #\yoff] +#else + ldr w9, [sp, #\yoff] + ldr w10, [sp, #\ycoeff] +#endif +.endm + .macro load_args_nv12 ldr x8, [sp] // table - ldr w9, [sp, #8] // y_offset - ldr w10, [sp, #16] // y_coeff + load_yoff_ycoeff 8, 16 // y_offset, y_coeff ld1 {v1.1D}, [x8] dup v0.8H, w10 dup v3.8H, w9 @@ -42,8 +50,7 @@ ldr x13, [sp] // srcV ldr w14, [sp, #8] // linesizeV ldr x8, [sp, #16] // table - ldr w9, [sp, #24] // y_offset - ldr w10, [sp, #32] // y_coeff + load_yoff_ycoeff 24, 32 // y_offset, y_coeff ld1 {v1.1D}, [x8] dup v0.8H, w10 dup v3.8H, w9 @@ -59,8 +66,7 @@ ldr x13, [sp] // srcV ldr w14, [sp, #8] // linesizeV ldr x8, [sp, #16] // table - ldr w9, [sp, #24] // y_offset - ldr w10, [sp, #32] // y_coeff + load_yoff_ycoeff 24, 32 // y_offset, y_coeff ld1 {v1.1D}, [x8] dup v0.8H, w10 dup v3.8H, w9 |