diff options
author | Martin Storsjö <martin@martin.st> | 2023-10-17 14:16:24 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2023-10-21 23:25:18 +0300 |
commit | 184103b3105f02f1189fa0047af4269e027dfbd6 (patch) | |
tree | 3e50ad549ed68292f91594c4e6fb26551de90369 /libavcodec/aarch64/h264idct_neon.S | |
parent | 393d1ee541b143633bfba2ff0e821d734fd511c2 (diff) | |
download | ffmpeg-184103b3105f02f1189fa0047af4269e027dfbd6.tar.gz |
aarch64: Consistently use lowercase for vector element specifiers
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/h264idct_neon.S')
-rw-r--r-- | libavcodec/aarch64/h264idct_neon.S | 390 |
1 files changed, 195 insertions, 195 deletions
diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S index 375da31d65..1bab2ca7c8 100644 --- a/libavcodec/aarch64/h264idct_neon.S +++ b/libavcodec/aarch64/h264idct_neon.S @@ -25,54 +25,54 @@ function ff_h264_idct_add_neon, export=1 .L_ff_h264_idct_add_neon: AARCH64_VALID_CALL_TARGET - ld1 {v0.4H, v1.4H, v2.4H, v3.4H}, [x1] + ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x1] sxtw x2, w2 - movi v30.8H, #0 + movi v30.8h, #0 - add v4.4H, v0.4H, v2.4H - sshr v16.4H, v1.4H, #1 - st1 {v30.8H}, [x1], #16 - sshr v17.4H, v3.4H, #1 - st1 {v30.8H}, [x1], #16 - sub v5.4H, v0.4H, v2.4H - sub v6.4H, v16.4H, v3.4H - add v7.4H, v1.4H, v17.4H - add v0.4H, v4.4H, v7.4H - add v1.4H, v5.4H, v6.4H - sub v2.4H, v5.4H, v6.4H - sub v3.4H, v4.4H, v7.4H + add v4.4h, v0.4h, v2.4h + sshr v16.4h, v1.4h, #1 + st1 {v30.8h}, [x1], #16 + sshr v17.4h, v3.4h, #1 + st1 {v30.8h}, [x1], #16 + sub v5.4h, v0.4h, v2.4h + sub v6.4h, v16.4h, v3.4h + add v7.4h, v1.4h, v17.4h + add v0.4h, v4.4h, v7.4h + add v1.4h, v5.4h, v6.4h + sub v2.4h, v5.4h, v6.4h + sub v3.4h, v4.4h, v7.4h transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7 - add v4.4H, v0.4H, v2.4H - ld1 {v18.S}[0], [x0], x2 - sshr v16.4H, v3.4H, #1 - sshr v17.4H, v1.4H, #1 - ld1 {v18.S}[1], [x0], x2 - sub v5.4H, v0.4H, v2.4H - ld1 {v19.S}[1], [x0], x2 - add v6.4H, v16.4H, v1.4H - ins v4.D[1], v5.D[0] - sub v7.4H, v17.4H, v3.4H - ld1 {v19.S}[0], [x0], x2 - ins v6.D[1], v7.D[0] + add v4.4h, v0.4h, v2.4h + ld1 {v18.s}[0], [x0], x2 + sshr v16.4h, v3.4h, #1 + sshr v17.4h, v1.4h, #1 + ld1 {v18.s}[1], [x0], x2 + sub v5.4h, v0.4h, v2.4h + ld1 {v19.s}[1], [x0], x2 + add v6.4h, v16.4h, v1.4h + ins v4.d[1], v5.d[0] + sub v7.4h, v17.4h, v3.4h + ld1 {v19.s}[0], [x0], x2 + ins v6.d[1], v7.d[0] sub x0, x0, x2, lsl #2 - add v0.8H, v4.8H, v6.8H - sub v1.8H, v4.8H, v6.8H + add v0.8h, v4.8h, v6.8h + sub v1.8h, v4.8h, v6.8h - srshr v0.8H, v0.8H, #6 - srshr v1.8H, v1.8H, #6 + srshr v0.8h, v0.8h, #6 + srshr v1.8h, v1.8h, #6 - uaddw v0.8H, v0.8H, v18.8B - uaddw v1.8H, v1.8H, v19.8B + uaddw v0.8h, v0.8h, v18.8b + uaddw v1.8h, v1.8h, v19.8b - sqxtun v0.8B, v0.8H - sqxtun v1.8B, v1.8H + sqxtun v0.8b, v0.8h + sqxtun v1.8b, v1.8h - st1 {v0.S}[0], [x0], x2 - st1 {v0.S}[1], [x0], x2 - st1 {v1.S}[1], [x0], x2 - st1 {v1.S}[0], [x0], x2 + st1 {v0.s}[0], [x0], x2 + st1 {v0.s}[1], [x0], x2 + st1 {v1.s}[1], [x0], x2 + st1 {v1.s}[0], [x0], x2 sub x1, x1, #32 ret @@ -83,22 +83,22 @@ function ff_h264_idct_dc_add_neon, export=1 AARCH64_VALID_CALL_TARGET sxtw x2, w2 mov w3, #0 - ld1r {v2.8H}, [x1] + ld1r {v2.8h}, [x1] strh w3, [x1] - srshr v2.8H, v2.8H, #6 - ld1 {v0.S}[0], [x0], x2 - ld1 {v0.S}[1], [x0], x2 - uaddw v3.8H, v2.8H, v0.8B - ld1 {v1.S}[0], [x0], x2 - ld1 {v1.S}[1], [x0], x2 - uaddw v4.8H, v2.8H, v1.8B - sqxtun v0.8B, v3.8H - sqxtun v1.8B, v4.8H + srshr v2.8h, v2.8h, #6 + ld1 {v0.s}[0], [x0], x2 + ld1 {v0.s}[1], [x0], x2 + uaddw v3.8h, v2.8h, v0.8b + ld1 {v1.s}[0], [x0], x2 + ld1 {v1.s}[1], [x0], x2 + uaddw v4.8h, v2.8h, v1.8b + sqxtun v0.8b, v3.8h + sqxtun v1.8b, v4.8h sub x0, x0, x2, lsl #2 - st1 {v0.S}[0], [x0], x2 - st1 {v0.S}[1], [x0], x2 - st1 {v1.S}[0], [x0], x2 - st1 {v1.S}[1], [x0], x2 + st1 {v0.s}[0], [x0], x2 + st1 {v0.s}[1], [x0], x2 + st1 {v1.s}[0], [x0], x2 + st1 {v1.s}[1], [x0], x2 ret endfunc @@ -194,71 +194,71 @@ endfunc .if \pass == 0 va .req v18 vb .req v30 - sshr v18.8H, v26.8H, #1 - add v16.8H, v24.8H, v28.8H - ld1 {v30.8H, v31.8H}, [x1] - st1 {v19.8H}, [x1], #16 - st1 {v19.8H}, [x1], #16 - sub v17.8H, v24.8H, v28.8H - sshr v19.8H, v30.8H, #1 - sub v18.8H, v18.8H, v30.8H - add v19.8H, v19.8H, v26.8H + sshr v18.8h, v26.8h, #1 + add v16.8h, v24.8h, v28.8h + ld1 {v30.8h, v31.8h}, [x1] + st1 {v19.8h}, [x1], #16 + st1 {v19.8h}, [x1], #16 + sub v17.8h, v24.8h, v28.8h + sshr v19.8h, v30.8h, #1 + sub v18.8h, v18.8h, v30.8h + add v19.8h, v19.8h, v26.8h .else va .req v30 vb .req v18 - sshr v30.8H, v26.8H, #1 - sshr v19.8H, v18.8H, #1 - add v16.8H, v24.8H, v28.8H - sub v17.8H, v24.8H, v28.8H - sub v30.8H, v30.8H, v18.8H - add v19.8H, v19.8H, v26.8H + sshr v30.8h, v26.8h, #1 + sshr v19.8h, v18.8h, #1 + add v16.8h, v24.8h, v28.8h + sub v17.8h, v24.8h, v28.8h + sub v30.8h, v30.8h, v18.8h + add v19.8h, v19.8h, v26.8h .endif - add v26.8H, v17.8H, va.8H - sub v28.8H, v17.8H, va.8H - add v24.8H, v16.8H, v19.8H - sub vb.8H, v16.8H, v19.8H - sub v16.8H, v29.8H, v27.8H - add v17.8H, v31.8H, v25.8H - sub va.8H, v31.8H, v25.8H - add v19.8H, v29.8H, v27.8H - sub v16.8H, v16.8H, v31.8H - sub v17.8H, v17.8H, v27.8H - add va.8H, va.8H, v29.8H - add v19.8H, v19.8H, v25.8H - sshr v25.8H, v25.8H, #1 - sshr v27.8H, v27.8H, #1 - sshr v29.8H, v29.8H, #1 - sshr v31.8H, v31.8H, #1 - sub v16.8H, v16.8H, v31.8H - sub v17.8H, v17.8H, v27.8H - add va.8H, va.8H, v29.8H - add v19.8H, v19.8H, v25.8H - sshr v25.8H, v16.8H, #2 - sshr v27.8H, v17.8H, #2 - sshr v29.8H, va.8H, #2 - sshr v31.8H, v19.8H, #2 - sub v19.8H, v19.8H, v25.8H - sub va.8H, v27.8H, va.8H - add v17.8H, v17.8H, v29.8H - add v16.8H, v16.8H, v31.8H + add v26.8h, v17.8h, va.8h + sub v28.8h, v17.8h, va.8h + add v24.8h, v16.8h, v19.8h + sub vb.8h, v16.8h, v19.8h + sub v16.8h, v29.8h, v27.8h + add v17.8h, v31.8h, v25.8h + sub va.8h, v31.8h, v25.8h + add v19.8h, v29.8h, v27.8h + sub v16.8h, v16.8h, v31.8h + sub v17.8h, v17.8h, v27.8h + add va.8h, va.8h, v29.8h + add v19.8h, v19.8h, v25.8h + sshr v25.8h, v25.8h, #1 + sshr v27.8h, v27.8h, #1 + sshr v29.8h, v29.8h, #1 + sshr v31.8h, v31.8h, #1 + sub v16.8h, v16.8h, v31.8h + sub v17.8h, v17.8h, v27.8h + add va.8h, va.8h, v29.8h + add v19.8h, v19.8h, v25.8h + sshr v25.8h, v16.8h, #2 + sshr v27.8h, v17.8h, #2 + sshr v29.8h, va.8h, #2 + sshr v31.8h, v19.8h, #2 + sub v19.8h, v19.8h, v25.8h + sub va.8h, v27.8h, va.8h + add v17.8h, v17.8h, v29.8h + add v16.8h, v16.8h, v31.8h .if \pass == 0 - sub v31.8H, v24.8H, v19.8H - add v24.8H, v24.8H, v19.8H - add v25.8H, v26.8H, v18.8H - sub v18.8H, v26.8H, v18.8H - add v26.8H, v28.8H, v17.8H - add v27.8H, v30.8H, v16.8H - sub v29.8H, v28.8H, v17.8H - sub v28.8H, v30.8H, v16.8H + sub v31.8h, v24.8h, v19.8h + add v24.8h, v24.8h, v19.8h + add v25.8h, v26.8h, v18.8h + sub v18.8h, v26.8h, v18.8h + add v26.8h, v28.8h, v17.8h + add v27.8h, v30.8h, v16.8h + sub v29.8h, v28.8h, v17.8h + sub v28.8h, v30.8h, v16.8h .else - sub v31.8H, v24.8H, v19.8H - add v24.8H, v24.8H, v19.8H - add v25.8H, v26.8H, v30.8H - sub v30.8H, v26.8H, v30.8H - add v26.8H, v28.8H, v17.8H - sub v29.8H, v28.8H, v17.8H - add v27.8H, v18.8H, v16.8H - sub v28.8H, v18.8H, v16.8H + sub v31.8h, v24.8h, v19.8h + add v24.8h, v24.8h, v19.8h + add v25.8h, v26.8h, v30.8h + sub v30.8h, v26.8h, v30.8h + add v26.8h, v28.8h, v17.8h + sub v29.8h, v28.8h, v17.8h + add v27.8h, v18.8h, v16.8h + sub v28.8h, v18.8h, v16.8h .endif .unreq va .unreq vb @@ -267,63 +267,63 @@ endfunc function ff_h264_idct8_add_neon, export=1 .L_ff_h264_idct8_add_neon: AARCH64_VALID_CALL_TARGET - movi v19.8H, #0 + movi v19.8h, #0 sxtw x2, w2 - ld1 {v24.8H, v25.8H}, [x1] - st1 {v19.8H}, [x1], #16 - st1 {v19.8H}, [x1], #16 - ld1 {v26.8H, v27.8H}, [x1] - st1 {v19.8H}, [x1], #16 - st1 {v19.8H}, [x1], #16 - ld1 {v28.8H, v29.8H}, [x1] - st1 {v19.8H}, [x1], #16 - st1 {v19.8H}, [x1], #16 + ld1 {v24.8h, v25.8h}, [x1] + st1 {v19.8h}, [x1], #16 + st1 {v19.8h}, [x1], #16 + ld1 {v26.8h, v27.8h}, [x1] + st1 {v19.8h}, [x1], #16 + st1 {v19.8h}, [x1], #16 + ld1 {v28.8h, v29.8h}, [x1] + st1 {v19.8h}, [x1], #16 + st1 {v19.8h}, [x1], #16 idct8x8_cols 0 transpose_8x8H v24, v25, v26, v27, v28, v29, v18, v31, v6, v7 idct8x8_cols 1 mov x3, x0 - srshr v24.8H, v24.8H, #6 - ld1 {v0.8B}, [x0], x2 - srshr v25.8H, v25.8H, #6 - ld1 {v1.8B}, [x0], x2 - srshr v26.8H, v26.8H, #6 - ld1 {v2.8B}, [x0], x2 - srshr v27.8H, v27.8H, #6 - ld1 {v3.8B}, [x0], x2 - srshr v28.8H, v28.8H, #6 - ld1 {v4.8B}, [x0], x2 - srshr v29.8H, v29.8H, #6 - ld1 {v5.8B}, [x0], x2 - srshr v30.8H, v30.8H, #6 - ld1 {v6.8B}, [x0], x2 - srshr v31.8H, v31.8H, #6 - ld1 {v7.8B}, [x0], x2 - uaddw v24.8H, v24.8H, v0.8B - uaddw v25.8H, v25.8H, v1.8B - uaddw v26.8H, v26.8H, v2.8B - sqxtun v0.8B, v24.8H - uaddw v27.8H, v27.8H, v3.8B - sqxtun v1.8B, v25.8H - uaddw v28.8H, v28.8H, v4.8B - sqxtun v2.8B, v26.8H - st1 {v0.8B}, [x3], x2 - uaddw v29.8H, v29.8H, v5.8B - sqxtun v3.8B, v27.8H - st1 {v1.8B}, [x3], x2 - uaddw v30.8H, v30.8H, v6.8B - sqxtun v4.8B, v28.8H - st1 {v2.8B}, [x3], x2 - uaddw v31.8H, v31.8H, v7.8B - sqxtun v5.8B, v29.8H - st1 {v3.8B}, [x3], x2 - sqxtun v6.8B, v30.8H - sqxtun v7.8B, v31.8H - st1 {v4.8B}, [x3], x2 - st1 {v5.8B}, [x3], x2 - st1 {v6.8B}, [x3], x2 - st1 {v7.8B}, [x3], x2 + srshr v24.8h, v24.8h, #6 + ld1 {v0.8b}, [x0], x2 + srshr v25.8h, v25.8h, #6 + ld1 {v1.8b}, [x0], x2 + srshr v26.8h, v26.8h, #6 + ld1 {v2.8b}, [x0], x2 + srshr v27.8h, v27.8h, #6 + ld1 {v3.8b}, [x0], x2 + srshr v28.8h, v28.8h, #6 + ld1 {v4.8b}, [x0], x2 + srshr v29.8h, v29.8h, #6 + ld1 {v5.8b}, [x0], x2 + srshr v30.8h, v30.8h, #6 + ld1 {v6.8b}, [x0], x2 + srshr v31.8h, v31.8h, #6 + ld1 {v7.8b}, [x0], x2 + uaddw v24.8h, v24.8h, v0.8b + uaddw v25.8h, v25.8h, v1.8b + uaddw v26.8h, v26.8h, v2.8b + sqxtun v0.8b, v24.8h + uaddw v27.8h, v27.8h, v3.8b + sqxtun v1.8b, v25.8h + uaddw v28.8h, v28.8h, v4.8b + sqxtun v2.8b, v26.8h + st1 {v0.8b}, [x3], x2 + uaddw v29.8h, v29.8h, v5.8b + sqxtun v3.8b, v27.8h + st1 {v1.8b}, [x3], x2 + uaddw v30.8h, v30.8h, v6.8b + sqxtun v4.8b, v28.8h + st1 {v2.8b}, [x3], x2 + uaddw v31.8h, v31.8h, v7.8b + sqxtun v5.8b, v29.8h + st1 {v3.8b}, [x3], x2 + sqxtun v6.8b, v30.8h + sqxtun v7.8b, v31.8h + st1 {v4.8b}, [x3], x2 + st1 {v5.8b}, [x3], x2 + st1 {v6.8b}, [x3], x2 + st1 {v7.8b}, [x3], x2 sub x1, x1, #128 ret @@ -334,42 +334,42 @@ function ff_h264_idct8_dc_add_neon, export=1 AARCH64_VALID_CALL_TARGET mov w3, #0 sxtw x2, w2 - ld1r {v31.8H}, [x1] + ld1r {v31.8h}, [x1] strh w3, [x1] - ld1 {v0.8B}, [x0], x2 - srshr v31.8H, v31.8H, #6 - ld1 {v1.8B}, [x0], x2 - ld1 {v2.8B}, [x0], x2 - uaddw v24.8H, v31.8H, v0.8B - ld1 {v3.8B}, [x0], x2 - uaddw v25.8H, v31.8H, v1.8B - ld1 {v4.8B}, [x0], x2 - uaddw v26.8H, v31.8H, v2.8B - ld1 {v5.8B}, [x0], x2 - uaddw v27.8H, v31.8H, v3.8B - ld1 {v6.8B}, [x0], x2 - uaddw v28.8H, v31.8H, v4.8B - ld1 {v7.8B}, [x0], x2 - uaddw v29.8H, v31.8H, v5.8B - uaddw v30.8H, v31.8H, v6.8B - uaddw v31.8H, v31.8H, v7.8B - sqxtun v0.8B, v24.8H - sqxtun v1.8B, v25.8H - sqxtun v2.8B, v26.8H - sqxtun v3.8B, v27.8H + ld1 {v0.8b}, [x0], x2 + srshr v31.8h, v31.8h, #6 + ld1 {v1.8b}, [x0], x2 + ld1 {v2.8b}, [x0], x2 + uaddw v24.8h, v31.8h, v0.8b + ld1 {v3.8b}, [x0], x2 + uaddw v25.8h, v31.8h, v1.8b + ld1 {v4.8b}, [x0], x2 + uaddw v26.8h, v31.8h, v2.8b + ld1 {v5.8b}, [x0], x2 + uaddw v27.8h, v31.8h, v3.8b + ld1 {v6.8b}, [x0], x2 + uaddw v28.8h, v31.8h, v4.8b + ld1 {v7.8b}, [x0], x2 + uaddw v29.8h, v31.8h, v5.8b + uaddw v30.8h, v31.8h, v6.8b + uaddw v31.8h, v31.8h, v7.8b + sqxtun v0.8b, v24.8h + sqxtun v1.8b, v25.8h + sqxtun v2.8b, v26.8h + sqxtun v3.8b, v27.8h sub x0, x0, x2, lsl #3 - st1 {v0.8B}, [x0], x2 - sqxtun v4.8B, v28.8H - st1 {v1.8B}, [x0], x2 - sqxtun v5.8B, v29.8H - st1 {v2.8B}, [x0], x2 - sqxtun v6.8B, v30.8H - st1 {v3.8B}, [x0], x2 - sqxtun v7.8B, v31.8H - st1 {v4.8B}, [x0], x2 - st1 {v5.8B}, [x0], x2 - st1 {v6.8B}, [x0], x2 - st1 {v7.8B}, [x0], x2 + st1 {v0.8b}, [x0], x2 + sqxtun v4.8b, v28.8h + st1 {v1.8b}, [x0], x2 + sqxtun v5.8b, v29.8h + st1 {v2.8b}, [x0], x2 + sqxtun v6.8b, v30.8h + st1 {v3.8b}, [x0], x2 + sqxtun v7.8b, v31.8h + st1 {v4.8b}, [x0], x2 + st1 {v5.8b}, [x0], x2 + st1 {v6.8b}, [x0], x2 + st1 {v7.8b}, [x0], x2 ret endfunc |