aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale
diff options
context:
space:
mode:
authorxufuji456 <839789740@qq.com>2023-11-15 11:53:12 +0800
committerMartin Storsjö <martin@martin.st>2023-11-28 15:54:49 +0200
commitcc86343b960793a822d6c51b58a1a7e3319cb217 (patch)
treed37b70fdb4ef8fa8157564e2a5efdd71d39cda87 /libswscale
parent67ce690bc6b64623bbe4ec79d4be7cad01cdfbbf (diff)
downloadffmpeg-cc86343b960793a822d6c51b58a1a7e3319cb217.tar.gz
lavc/hevcdsp_qpel_neon: using movi.16b instead of movi.2d
Building iOS platform with arm64, the compiler has a warning: "instruction movi.2d with immediate #0 may not function correctly on this CPU, converting to movi.16b" Signed-off-by: xufuji456 <839789740@qq.com> Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/aarch64/hscale.S72
1 files changed, 36 insertions, 36 deletions
diff --git a/libswscale/aarch64/hscale.S b/libswscale/aarch64/hscale.S
index b49443c964..b3873fc4b7 100644
--- a/libswscale/aarch64/hscale.S
+++ b/libswscale/aarch64/hscale.S
@@ -50,10 +50,10 @@ function ff_hscale8to15_X8_neon, export=1
add x12, x16, x7 // filter1 = filter0 + filterSize*2
add x13, x12, x7 // filter2 = filter1 + filterSize*2
add x4, x13, x7 // filter3 = filter2 + filterSize*2
- movi v0.2d, #0 // val sum part 1 (for dst[0])
- movi v1.2d, #0 // val sum part 2 (for dst[1])
- movi v2.2d, #0 // val sum part 3 (for dst[2])
- movi v3.2d, #0 // val sum part 4 (for dst[3])
+ movi v0.16b, #0 // val sum part 1 (for dst[0])
+ movi v1.16b, #0 // val sum part 2 (for dst[1])
+ movi v2.16b, #0 // val sum part 3 (for dst[2])
+ movi v3.16b, #0 // val sum part 4 (for dst[3])
add x17, x3, w8, uxtw // srcp + filterPos[0]
add x8, x3, w0, uxtw // srcp + filterPos[1]
add x0, x3, w11, uxtw // srcp + filterPos[2]
@@ -108,10 +108,10 @@ function ff_hscale8to15_X4_neon, export=1
ldp w8, w9, [x5] // filterPos[idx + 0], [idx + 1]
ldp w10, w11, [x5, #8] // filterPos[idx + 2], [idx + 3]
- movi v16.2d, #0 // initialize accumulator for idx + 0
- movi v17.2d, #0 // initialize accumulator for idx + 1
- movi v18.2d, #0 // initialize accumulator for idx + 2
- movi v19.2d, #0 // initialize accumulator for idx + 3
+ movi v16.16b, #0 // initialize accumulator for idx + 0
+ movi v17.16b, #0 // initialize accumulator for idx + 1
+ movi v18.16b, #0 // initialize accumulator for idx + 2
+ movi v19.16b, #0 // initialize accumulator for idx + 3
mov x12, x4 // filter pointer for idx + 0
add x13, x4, x7 // filter pointer for idx + 1
@@ -253,8 +253,8 @@ function ff_hscale8to15_4_neon, export=1
ldp w12, w13, [x5, #16] // filterPos[idx + 4][0..3], [idx + 5][0..3], next iteration
ldp w14, w15, [x5, #24] // filterPos[idx + 6][0..3], [idx + 7][0..3], next iteration
- movi v0.2d, #0 // Clear madd accumulator for idx 0..3
- movi v5.2d, #0 // Clear madd accumulator for idx 4..7
+ movi v0.16b, #0 // Clear madd accumulator for idx 0..3
+ movi v5.16b, #0 // Clear madd accumulator for idx 4..7
ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x4], #64 // load filter idx + 0..7
@@ -299,8 +299,8 @@ function ff_hscale8to15_4_neon, export=1
ld4 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp]
ld4 {v1.8h, v2.8h, v3.8h, v4.8h}, [x4], #64 // load filter idx + 0..7
- movi v0.2d, #0 // Clear madd accumulator for idx 0..3
- movi v5.2d, #0 // Clear madd accumulator for idx 4..7
+ movi v0.16b, #0 // Clear madd accumulator for idx 0..3
+ movi v5.16b, #0 // Clear madd accumulator for idx 4..7
uxtl v16.8h, v16.8b // unsigned extend long, covert src data to 16-bit
uxtl v17.8h, v17.8b // unsigned extend long, covert src data to 16-bit
@@ -499,10 +499,10 @@ function ff_hscale8to19_X8_neon, export=1
ldr w11, [x5], #4 // filterPos[idx + 2]
add x4, x13, x7 // filter3 = filter2 + filterSize*2
ldr w9, [x5], #4 // filterPos[idx + 3]
- movi v0.2d, #0 // val sum part 1 (for dst[0])
- movi v1.2d, #0 // val sum part 2 (for dst[1])
- movi v2.2d, #0 // val sum part 3 (for dst[2])
- movi v3.2d, #0 // val sum part 4 (for dst[3])
+ movi v0.16b, #0 // val sum part 1 (for dst[0])
+ movi v1.16b, #0 // val sum part 2 (for dst[1])
+ movi v2.16b, #0 // val sum part 3 (for dst[2])
+ movi v3.16b, #0 // val sum part 4 (for dst[3])
add x17, x3, w8, uxtw // srcp + filterPos[0]
add x8, x3, w0, uxtw // srcp + filterPos[1]
add x0, x3, w11, uxtw // srcp + filterPos[2]
@@ -560,10 +560,10 @@ function ff_hscale8to19_X4_neon, export=1
ldp w8, w9, [x5]
ldp w10, w11, [x5, #8]
- movi v16.2d, #0 // initialize accumulator for idx + 0
- movi v17.2d, #0 // initialize accumulator for idx + 1
- movi v18.2d, #0 // initialize accumulator for idx + 2
- movi v19.2d, #0 // initialize accumulator for idx + 3
+ movi v16.16b, #0 // initialize accumulator for idx + 0
+ movi v17.16b, #0 // initialize accumulator for idx + 1
+ movi v18.16b, #0 // initialize accumulator for idx + 2
+ movi v19.16b, #0 // initialize accumulator for idx + 3
mov x12, x4 // filter + 0
add x13, x4, x7 // filter + 1
@@ -865,10 +865,10 @@ function ff_hscale16to15_X8_neon_asm, export=1
add x12, x16, x7 // filter1 = filter0 + filterSize*2
add x13, x12, x7 // filter2 = filter1 + filterSize*2
add x4, x13, x7 // filter3 = filter2 + filterSize*2
- movi v0.2d, #0 // val sum part 1 (for dst[0])
- movi v1.2d, #0 // val sum part 2 (for dst[1])
- movi v2.2d, #0 // val sum part 3 (for dst[2])
- movi v3.2d, #0 // val sum part 4 (for dst[3])
+ movi v0.16b, #0 // val sum part 1 (for dst[0])
+ movi v1.16b, #0 // val sum part 2 (for dst[1])
+ movi v2.16b, #0 // val sum part 3 (for dst[2])
+ movi v3.16b, #0 // val sum part 4 (for dst[3])
add x17, x3, w8, uxtw // srcp + filterPos[0]
add x8, x3, w10, uxtw // srcp + filterPos[1]
add x10, x3, w11, uxtw // srcp + filterPos[2]
@@ -945,10 +945,10 @@ function ff_hscale16to15_X4_neon_asm, export=1
ldp w8, w9, [x5]
ldp w10, w11, [x5, #8]
- movi v16.2d, #0 // initialize accumulator for idx + 0
- movi v17.2d, #0 // initialize accumulator for idx + 1
- movi v18.2d, #0 // initialize accumulator for idx + 2
- movi v19.2d, #0 // initialize accumulator for idx + 3
+ movi v16.16b, #0 // initialize accumulator for idx + 0
+ movi v17.16b, #0 // initialize accumulator for idx + 1
+ movi v18.16b, #0 // initialize accumulator for idx + 2
+ movi v19.16b, #0 // initialize accumulator for idx + 3
mov x12, x4 // filter + 0
add x13, x4, x7 // filter + 1
@@ -1270,10 +1270,10 @@ function ff_hscale16to19_X8_neon_asm, export=1
add x13, x12, x7 // filter2 = filter1 + filterSize*2
lsl w10, w10, #1
add x4, x13, x7 // filter3 = filter2 + filterSize*2
- movi v0.2d, #0 // val sum part 1 (for dst[0])
- movi v1.2d, #0 // val sum part 2 (for dst[1])
- movi v2.2d, #0 // val sum part 3 (for dst[2])
- movi v3.2d, #0 // val sum part 4 (for dst[3])
+ movi v0.16b, #0 // val sum part 1 (for dst[0])
+ movi v1.16b, #0 // val sum part 2 (for dst[1])
+ movi v2.16b, #0 // val sum part 3 (for dst[2])
+ movi v3.16b, #0 // val sum part 4 (for dst[3])
add x17, x3, w8, uxtw // srcp + filterPos[0]
add x8, x3, w10, uxtw // srcp + filterPos[1]
add x10, x3, w11, uxtw // srcp + filterPos[2]
@@ -1348,10 +1348,10 @@ function ff_hscale16to19_X4_neon_asm, export=1
ldp w8, w9, [x5]
ldp w10, w11, [x5, #8]
- movi v16.2d, #0 // initialize accumulator for idx + 0
- movi v17.2d, #0 // initialize accumulator for idx + 1
- movi v18.2d, #0 // initialize accumulator for idx + 2
- movi v19.2d, #0 // initialize accumulator for idx + 3
+ movi v16.16b, #0 // initialize accumulator for idx + 0
+ movi v17.16b, #0 // initialize accumulator for idx + 1
+ movi v18.16b, #0 // initialize accumulator for idx + 2
+ movi v19.16b, #0 // initialize accumulator for idx + 3
mov x12, x4 // filter + 0
add x13, x4, x7 // filter + 1