aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/aarch64/h264idct_neon.S
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2023-10-17 14:16:24 +0300
committerMartin Storsjö <martin@martin.st>2023-10-21 23:25:18 +0300
commit184103b3105f02f1189fa0047af4269e027dfbd6 (patch)
tree3e50ad549ed68292f91594c4e6fb26551de90369 /libavcodec/aarch64/h264idct_neon.S
parent393d1ee541b143633bfba2ff0e821d734fd511c2 (diff)
downloadffmpeg-184103b3105f02f1189fa0047af4269e027dfbd6.tar.gz
aarch64: Consistently use lowercase for vector element specifiers
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/h264idct_neon.S')
-rw-r--r--libavcodec/aarch64/h264idct_neon.S390
1 files changed, 195 insertions, 195 deletions
diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S
index 375da31d65..1bab2ca7c8 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -25,54 +25,54 @@
function ff_h264_idct_add_neon, export=1
.L_ff_h264_idct_add_neon:
AARCH64_VALID_CALL_TARGET
- ld1 {v0.4H, v1.4H, v2.4H, v3.4H}, [x1]
+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x1]
sxtw x2, w2
- movi v30.8H, #0
+ movi v30.8h, #0
- add v4.4H, v0.4H, v2.4H
- sshr v16.4H, v1.4H, #1
- st1 {v30.8H}, [x1], #16
- sshr v17.4H, v3.4H, #1
- st1 {v30.8H}, [x1], #16
- sub v5.4H, v0.4H, v2.4H
- sub v6.4H, v16.4H, v3.4H
- add v7.4H, v1.4H, v17.4H
- add v0.4H, v4.4H, v7.4H
- add v1.4H, v5.4H, v6.4H
- sub v2.4H, v5.4H, v6.4H
- sub v3.4H, v4.4H, v7.4H
+ add v4.4h, v0.4h, v2.4h
+ sshr v16.4h, v1.4h, #1
+ st1 {v30.8h}, [x1], #16
+ sshr v17.4h, v3.4h, #1
+ st1 {v30.8h}, [x1], #16
+ sub v5.4h, v0.4h, v2.4h
+ sub v6.4h, v16.4h, v3.4h
+ add v7.4h, v1.4h, v17.4h
+ add v0.4h, v4.4h, v7.4h
+ add v1.4h, v5.4h, v6.4h
+ sub v2.4h, v5.4h, v6.4h
+ sub v3.4h, v4.4h, v7.4h
transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7
- add v4.4H, v0.4H, v2.4H
- ld1 {v18.S}[0], [x0], x2
- sshr v16.4H, v3.4H, #1
- sshr v17.4H, v1.4H, #1
- ld1 {v18.S}[1], [x0], x2
- sub v5.4H, v0.4H, v2.4H
- ld1 {v19.S}[1], [x0], x2
- add v6.4H, v16.4H, v1.4H
- ins v4.D[1], v5.D[0]
- sub v7.4H, v17.4H, v3.4H
- ld1 {v19.S}[0], [x0], x2
- ins v6.D[1], v7.D[0]
+ add v4.4h, v0.4h, v2.4h
+ ld1 {v18.s}[0], [x0], x2
+ sshr v16.4h, v3.4h, #1
+ sshr v17.4h, v1.4h, #1
+ ld1 {v18.s}[1], [x0], x2
+ sub v5.4h, v0.4h, v2.4h
+ ld1 {v19.s}[1], [x0], x2
+ add v6.4h, v16.4h, v1.4h
+ ins v4.d[1], v5.d[0]
+ sub v7.4h, v17.4h, v3.4h
+ ld1 {v19.s}[0], [x0], x2
+ ins v6.d[1], v7.d[0]
sub x0, x0, x2, lsl #2
- add v0.8H, v4.8H, v6.8H
- sub v1.8H, v4.8H, v6.8H
+ add v0.8h, v4.8h, v6.8h
+ sub v1.8h, v4.8h, v6.8h
- srshr v0.8H, v0.8H, #6
- srshr v1.8H, v1.8H, #6
+ srshr v0.8h, v0.8h, #6
+ srshr v1.8h, v1.8h, #6
- uaddw v0.8H, v0.8H, v18.8B
- uaddw v1.8H, v1.8H, v19.8B
+ uaddw v0.8h, v0.8h, v18.8b
+ uaddw v1.8h, v1.8h, v19.8b
- sqxtun v0.8B, v0.8H
- sqxtun v1.8B, v1.8H
+ sqxtun v0.8b, v0.8h
+ sqxtun v1.8b, v1.8h
- st1 {v0.S}[0], [x0], x2
- st1 {v0.S}[1], [x0], x2
- st1 {v1.S}[1], [x0], x2
- st1 {v1.S}[0], [x0], x2
+ st1 {v0.s}[0], [x0], x2
+ st1 {v0.s}[1], [x0], x2
+ st1 {v1.s}[1], [x0], x2
+ st1 {v1.s}[0], [x0], x2
sub x1, x1, #32
ret
@@ -83,22 +83,22 @@ function ff_h264_idct_dc_add_neon, export=1
AARCH64_VALID_CALL_TARGET
sxtw x2, w2
mov w3, #0
- ld1r {v2.8H}, [x1]
+ ld1r {v2.8h}, [x1]
strh w3, [x1]
- srshr v2.8H, v2.8H, #6
- ld1 {v0.S}[0], [x0], x2
- ld1 {v0.S}[1], [x0], x2
- uaddw v3.8H, v2.8H, v0.8B
- ld1 {v1.S}[0], [x0], x2
- ld1 {v1.S}[1], [x0], x2
- uaddw v4.8H, v2.8H, v1.8B
- sqxtun v0.8B, v3.8H
- sqxtun v1.8B, v4.8H
+ srshr v2.8h, v2.8h, #6
+ ld1 {v0.s}[0], [x0], x2
+ ld1 {v0.s}[1], [x0], x2
+ uaddw v3.8h, v2.8h, v0.8b
+ ld1 {v1.s}[0], [x0], x2
+ ld1 {v1.s}[1], [x0], x2
+ uaddw v4.8h, v2.8h, v1.8b
+ sqxtun v0.8b, v3.8h
+ sqxtun v1.8b, v4.8h
sub x0, x0, x2, lsl #2
- st1 {v0.S}[0], [x0], x2
- st1 {v0.S}[1], [x0], x2
- st1 {v1.S}[0], [x0], x2
- st1 {v1.S}[1], [x0], x2
+ st1 {v0.s}[0], [x0], x2
+ st1 {v0.s}[1], [x0], x2
+ st1 {v1.s}[0], [x0], x2
+ st1 {v1.s}[1], [x0], x2
ret
endfunc
@@ -194,71 +194,71 @@ endfunc
.if \pass == 0
va .req v18
vb .req v30
- sshr v18.8H, v26.8H, #1
- add v16.8H, v24.8H, v28.8H
- ld1 {v30.8H, v31.8H}, [x1]
- st1 {v19.8H}, [x1], #16
- st1 {v19.8H}, [x1], #16
- sub v17.8H, v24.8H, v28.8H
- sshr v19.8H, v30.8H, #1
- sub v18.8H, v18.8H, v30.8H
- add v19.8H, v19.8H, v26.8H
+ sshr v18.8h, v26.8h, #1
+ add v16.8h, v24.8h, v28.8h
+ ld1 {v30.8h, v31.8h}, [x1]
+ st1 {v19.8h}, [x1], #16
+ st1 {v19.8h}, [x1], #16
+ sub v17.8h, v24.8h, v28.8h
+ sshr v19.8h, v30.8h, #1
+ sub v18.8h, v18.8h, v30.8h
+ add v19.8h, v19.8h, v26.8h
.else
va .req v30
vb .req v18
- sshr v30.8H, v26.8H, #1
- sshr v19.8H, v18.8H, #1
- add v16.8H, v24.8H, v28.8H
- sub v17.8H, v24.8H, v28.8H
- sub v30.8H, v30.8H, v18.8H
- add v19.8H, v19.8H, v26.8H
+ sshr v30.8h, v26.8h, #1
+ sshr v19.8h, v18.8h, #1
+ add v16.8h, v24.8h, v28.8h
+ sub v17.8h, v24.8h, v28.8h
+ sub v30.8h, v30.8h, v18.8h
+ add v19.8h, v19.8h, v26.8h
.endif
- add v26.8H, v17.8H, va.8H
- sub v28.8H, v17.8H, va.8H
- add v24.8H, v16.8H, v19.8H
- sub vb.8H, v16.8H, v19.8H
- sub v16.8H, v29.8H, v27.8H
- add v17.8H, v31.8H, v25.8H
- sub va.8H, v31.8H, v25.8H
- add v19.8H, v29.8H, v27.8H
- sub v16.8H, v16.8H, v31.8H
- sub v17.8H, v17.8H, v27.8H
- add va.8H, va.8H, v29.8H
- add v19.8H, v19.8H, v25.8H
- sshr v25.8H, v25.8H, #1
- sshr v27.8H, v27.8H, #1
- sshr v29.8H, v29.8H, #1
- sshr v31.8H, v31.8H, #1
- sub v16.8H, v16.8H, v31.8H
- sub v17.8H, v17.8H, v27.8H
- add va.8H, va.8H, v29.8H
- add v19.8H, v19.8H, v25.8H
- sshr v25.8H, v16.8H, #2
- sshr v27.8H, v17.8H, #2
- sshr v29.8H, va.8H, #2
- sshr v31.8H, v19.8H, #2
- sub v19.8H, v19.8H, v25.8H
- sub va.8H, v27.8H, va.8H
- add v17.8H, v17.8H, v29.8H
- add v16.8H, v16.8H, v31.8H
+ add v26.8h, v17.8h, va.8h
+ sub v28.8h, v17.8h, va.8h
+ add v24.8h, v16.8h, v19.8h
+ sub vb.8h, v16.8h, v19.8h
+ sub v16.8h, v29.8h, v27.8h
+ add v17.8h, v31.8h, v25.8h
+ sub va.8h, v31.8h, v25.8h
+ add v19.8h, v29.8h, v27.8h
+ sub v16.8h, v16.8h, v31.8h
+ sub v17.8h, v17.8h, v27.8h
+ add va.8h, va.8h, v29.8h
+ add v19.8h, v19.8h, v25.8h
+ sshr v25.8h, v25.8h, #1
+ sshr v27.8h, v27.8h, #1
+ sshr v29.8h, v29.8h, #1
+ sshr v31.8h, v31.8h, #1
+ sub v16.8h, v16.8h, v31.8h
+ sub v17.8h, v17.8h, v27.8h
+ add va.8h, va.8h, v29.8h
+ add v19.8h, v19.8h, v25.8h
+ sshr v25.8h, v16.8h, #2
+ sshr v27.8h, v17.8h, #2
+ sshr v29.8h, va.8h, #2
+ sshr v31.8h, v19.8h, #2
+ sub v19.8h, v19.8h, v25.8h
+ sub va.8h, v27.8h, va.8h
+ add v17.8h, v17.8h, v29.8h
+ add v16.8h, v16.8h, v31.8h
.if \pass == 0
- sub v31.8H, v24.8H, v19.8H
- add v24.8H, v24.8H, v19.8H
- add v25.8H, v26.8H, v18.8H
- sub v18.8H, v26.8H, v18.8H
- add v26.8H, v28.8H, v17.8H
- add v27.8H, v30.8H, v16.8H
- sub v29.8H, v28.8H, v17.8H
- sub v28.8H, v30.8H, v16.8H
+ sub v31.8h, v24.8h, v19.8h
+ add v24.8h, v24.8h, v19.8h
+ add v25.8h, v26.8h, v18.8h
+ sub v18.8h, v26.8h, v18.8h
+ add v26.8h, v28.8h, v17.8h
+ add v27.8h, v30.8h, v16.8h
+ sub v29.8h, v28.8h, v17.8h
+ sub v28.8h, v30.8h, v16.8h
.else
- sub v31.8H, v24.8H, v19.8H
- add v24.8H, v24.8H, v19.8H
- add v25.8H, v26.8H, v30.8H
- sub v30.8H, v26.8H, v30.8H
- add v26.8H, v28.8H, v17.8H
- sub v29.8H, v28.8H, v17.8H
- add v27.8H, v18.8H, v16.8H
- sub v28.8H, v18.8H, v16.8H
+ sub v31.8h, v24.8h, v19.8h
+ add v24.8h, v24.8h, v19.8h
+ add v25.8h, v26.8h, v30.8h
+ sub v30.8h, v26.8h, v30.8h
+ add v26.8h, v28.8h, v17.8h
+ sub v29.8h, v28.8h, v17.8h
+ add v27.8h, v18.8h, v16.8h
+ sub v28.8h, v18.8h, v16.8h
.endif
.unreq va
.unreq vb
@@ -267,63 +267,63 @@ endfunc
function ff_h264_idct8_add_neon, export=1
.L_ff_h264_idct8_add_neon:
AARCH64_VALID_CALL_TARGET
- movi v19.8H, #0
+ movi v19.8h, #0
sxtw x2, w2
- ld1 {v24.8H, v25.8H}, [x1]
- st1 {v19.8H}, [x1], #16
- st1 {v19.8H}, [x1], #16
- ld1 {v26.8H, v27.8H}, [x1]
- st1 {v19.8H}, [x1], #16
- st1 {v19.8H}, [x1], #16
- ld1 {v28.8H, v29.8H}, [x1]
- st1 {v19.8H}, [x1], #16
- st1 {v19.8H}, [x1], #16
+ ld1 {v24.8h, v25.8h}, [x1]
+ st1 {v19.8h}, [x1], #16
+ st1 {v19.8h}, [x1], #16
+ ld1 {v26.8h, v27.8h}, [x1]
+ st1 {v19.8h}, [x1], #16
+ st1 {v19.8h}, [x1], #16
+ ld1 {v28.8h, v29.8h}, [x1]
+ st1 {v19.8h}, [x1], #16
+ st1 {v19.8h}, [x1], #16
idct8x8_cols 0
transpose_8x8H v24, v25, v26, v27, v28, v29, v18, v31, v6, v7
idct8x8_cols 1
mov x3, x0
- srshr v24.8H, v24.8H, #6
- ld1 {v0.8B}, [x0], x2
- srshr v25.8H, v25.8H, #6
- ld1 {v1.8B}, [x0], x2
- srshr v26.8H, v26.8H, #6
- ld1 {v2.8B}, [x0], x2
- srshr v27.8H, v27.8H, #6
- ld1 {v3.8B}, [x0], x2
- srshr v28.8H, v28.8H, #6
- ld1 {v4.8B}, [x0], x2
- srshr v29.8H, v29.8H, #6
- ld1 {v5.8B}, [x0], x2
- srshr v30.8H, v30.8H, #6
- ld1 {v6.8B}, [x0], x2
- srshr v31.8H, v31.8H, #6
- ld1 {v7.8B}, [x0], x2
- uaddw v24.8H, v24.8H, v0.8B
- uaddw v25.8H, v25.8H, v1.8B
- uaddw v26.8H, v26.8H, v2.8B
- sqxtun v0.8B, v24.8H
- uaddw v27.8H, v27.8H, v3.8B
- sqxtun v1.8B, v25.8H
- uaddw v28.8H, v28.8H, v4.8B
- sqxtun v2.8B, v26.8H
- st1 {v0.8B}, [x3], x2
- uaddw v29.8H, v29.8H, v5.8B
- sqxtun v3.8B, v27.8H
- st1 {v1.8B}, [x3], x2
- uaddw v30.8H, v30.8H, v6.8B
- sqxtun v4.8B, v28.8H
- st1 {v2.8B}, [x3], x2
- uaddw v31.8H, v31.8H, v7.8B
- sqxtun v5.8B, v29.8H
- st1 {v3.8B}, [x3], x2
- sqxtun v6.8B, v30.8H
- sqxtun v7.8B, v31.8H
- st1 {v4.8B}, [x3], x2
- st1 {v5.8B}, [x3], x2
- st1 {v6.8B}, [x3], x2
- st1 {v7.8B}, [x3], x2
+ srshr v24.8h, v24.8h, #6
+ ld1 {v0.8b}, [x0], x2
+ srshr v25.8h, v25.8h, #6
+ ld1 {v1.8b}, [x0], x2
+ srshr v26.8h, v26.8h, #6
+ ld1 {v2.8b}, [x0], x2
+ srshr v27.8h, v27.8h, #6
+ ld1 {v3.8b}, [x0], x2
+ srshr v28.8h, v28.8h, #6
+ ld1 {v4.8b}, [x0], x2
+ srshr v29.8h, v29.8h, #6
+ ld1 {v5.8b}, [x0], x2
+ srshr v30.8h, v30.8h, #6
+ ld1 {v6.8b}, [x0], x2
+ srshr v31.8h, v31.8h, #6
+ ld1 {v7.8b}, [x0], x2
+ uaddw v24.8h, v24.8h, v0.8b
+ uaddw v25.8h, v25.8h, v1.8b
+ uaddw v26.8h, v26.8h, v2.8b
+ sqxtun v0.8b, v24.8h
+ uaddw v27.8h, v27.8h, v3.8b
+ sqxtun v1.8b, v25.8h
+ uaddw v28.8h, v28.8h, v4.8b
+ sqxtun v2.8b, v26.8h
+ st1 {v0.8b}, [x3], x2
+ uaddw v29.8h, v29.8h, v5.8b
+ sqxtun v3.8b, v27.8h
+ st1 {v1.8b}, [x3], x2
+ uaddw v30.8h, v30.8h, v6.8b
+ sqxtun v4.8b, v28.8h
+ st1 {v2.8b}, [x3], x2
+ uaddw v31.8h, v31.8h, v7.8b
+ sqxtun v5.8b, v29.8h
+ st1 {v3.8b}, [x3], x2
+ sqxtun v6.8b, v30.8h
+ sqxtun v7.8b, v31.8h
+ st1 {v4.8b}, [x3], x2
+ st1 {v5.8b}, [x3], x2
+ st1 {v6.8b}, [x3], x2
+ st1 {v7.8b}, [x3], x2
sub x1, x1, #128
ret
@@ -334,42 +334,42 @@ function ff_h264_idct8_dc_add_neon, export=1
AARCH64_VALID_CALL_TARGET
mov w3, #0
sxtw x2, w2
- ld1r {v31.8H}, [x1]
+ ld1r {v31.8h}, [x1]
strh w3, [x1]
- ld1 {v0.8B}, [x0], x2
- srshr v31.8H, v31.8H, #6
- ld1 {v1.8B}, [x0], x2
- ld1 {v2.8B}, [x0], x2
- uaddw v24.8H, v31.8H, v0.8B
- ld1 {v3.8B}, [x0], x2
- uaddw v25.8H, v31.8H, v1.8B
- ld1 {v4.8B}, [x0], x2
- uaddw v26.8H, v31.8H, v2.8B
- ld1 {v5.8B}, [x0], x2
- uaddw v27.8H, v31.8H, v3.8B
- ld1 {v6.8B}, [x0], x2
- uaddw v28.8H, v31.8H, v4.8B
- ld1 {v7.8B}, [x0], x2
- uaddw v29.8H, v31.8H, v5.8B
- uaddw v30.8H, v31.8H, v6.8B
- uaddw v31.8H, v31.8H, v7.8B
- sqxtun v0.8B, v24.8H
- sqxtun v1.8B, v25.8H
- sqxtun v2.8B, v26.8H
- sqxtun v3.8B, v27.8H
+ ld1 {v0.8b}, [x0], x2
+ srshr v31.8h, v31.8h, #6
+ ld1 {v1.8b}, [x0], x2
+ ld1 {v2.8b}, [x0], x2
+ uaddw v24.8h, v31.8h, v0.8b
+ ld1 {v3.8b}, [x0], x2
+ uaddw v25.8h, v31.8h, v1.8b
+ ld1 {v4.8b}, [x0], x2
+ uaddw v26.8h, v31.8h, v2.8b
+ ld1 {v5.8b}, [x0], x2
+ uaddw v27.8h, v31.8h, v3.8b
+ ld1 {v6.8b}, [x0], x2
+ uaddw v28.8h, v31.8h, v4.8b
+ ld1 {v7.8b}, [x0], x2
+ uaddw v29.8h, v31.8h, v5.8b
+ uaddw v30.8h, v31.8h, v6.8b
+ uaddw v31.8h, v31.8h, v7.8b
+ sqxtun v0.8b, v24.8h
+ sqxtun v1.8b, v25.8h
+ sqxtun v2.8b, v26.8h
+ sqxtun v3.8b, v27.8h
sub x0, x0, x2, lsl #3
- st1 {v0.8B}, [x0], x2
- sqxtun v4.8B, v28.8H
- st1 {v1.8B}, [x0], x2
- sqxtun v5.8B, v29.8H
- st1 {v2.8B}, [x0], x2
- sqxtun v6.8B, v30.8H
- st1 {v3.8B}, [x0], x2
- sqxtun v7.8B, v31.8H
- st1 {v4.8B}, [x0], x2
- st1 {v5.8B}, [x0], x2
- st1 {v6.8B}, [x0], x2
- st1 {v7.8B}, [x0], x2
+ st1 {v0.8b}, [x0], x2
+ sqxtun v4.8b, v28.8h
+ st1 {v1.8b}, [x0], x2
+ sqxtun v5.8b, v29.8h
+ st1 {v2.8b}, [x0], x2
+ sqxtun v6.8b, v30.8h
+ st1 {v3.8b}, [x0], x2
+ sqxtun v7.8b, v31.8h
+ st1 {v4.8b}, [x0], x2
+ st1 {v5.8b}, [x0], x2
+ st1 {v6.8b}, [x0], x2
+ st1 {v7.8b}, [x0], x2
ret
endfunc