diff options
author | Martin Storsjö <martin@martin.st> | 2023-10-17 14:16:24 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2023-10-21 23:25:18 +0300 |
commit | 184103b3105f02f1189fa0047af4269e027dfbd6 (patch) | |
tree | 3e50ad549ed68292f91594c4e6fb26551de90369 /libavcodec/aarch64/sbrdsp_neon.S | |
parent | 393d1ee541b143633bfba2ff0e821d734fd511c2 (diff) | |
download | ffmpeg-184103b3105f02f1189fa0047af4269e027dfbd6.tar.gz |
aarch64: Consistently use lowercase for vector element specifiers
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/sbrdsp_neon.S')
-rw-r--r-- | libavcodec/aarch64/sbrdsp_neon.S | 294 |
1 files changed, 147 insertions, 147 deletions
diff --git a/libavcodec/aarch64/sbrdsp_neon.S b/libavcodec/aarch64/sbrdsp_neon.S index d23717e760..1fdde6ccb6 100644 --- a/libavcodec/aarch64/sbrdsp_neon.S +++ b/libavcodec/aarch64/sbrdsp_neon.S @@ -46,49 +46,49 @@ function ff_sbr_sum64x5_neon, export=1 add x3, x0, #192*4 add x4, x0, #256*4 mov x5, #64 -1: ld1 {v0.4S}, [x0] - ld1 {v1.4S}, [x1], #16 - fadd v0.4S, v0.4S, v1.4S - ld1 {v2.4S}, [x2], #16 - fadd v0.4S, v0.4S, v2.4S - ld1 {v3.4S}, [x3], #16 - fadd v0.4S, v0.4S, v3.4S - ld1 {v4.4S}, [x4], #16 - fadd v0.4S, v0.4S, v4.4S - st1 {v0.4S}, [x0], #16 +1: ld1 {v0.4s}, [x0] + ld1 {v1.4s}, [x1], #16 + fadd v0.4s, v0.4s, v1.4s + ld1 {v2.4s}, [x2], #16 + fadd v0.4s, v0.4s, v2.4s + ld1 {v3.4s}, [x3], #16 + fadd v0.4s, v0.4s, v3.4s + ld1 {v4.4s}, [x4], #16 + fadd v0.4s, v0.4s, v4.4s + st1 {v0.4s}, [x0], #16 subs x5, x5, #4 b.gt 1b ret endfunc function ff_sbr_sum_square_neon, export=1 - movi v0.4S, #0 -1: ld1 {v1.4S}, [x0], #16 - fmla v0.4S, v1.4S, v1.4S + movi v0.4s, #0 +1: ld1 {v1.4s}, [x0], #16 + fmla v0.4s, v1.4s, v1.4s subs w1, w1, #2 b.gt 1b - faddp v0.4S, v0.4S, v0.4S - faddp v0.4S, v0.4S, v0.4S + faddp v0.4s, v0.4s, v0.4s + faddp v0.4s, v0.4s, v0.4s ret endfunc function ff_sbr_neg_odd_64_neon, export=1 mov x1, x0 - movi v5.4S, #1<<7, lsl #24 - ld2 {v0.4S, v1.4S}, [x0], #32 - eor v1.16B, v1.16B, v5.16B - ld2 {v2.4S, v3.4S}, [x0], #32 + movi v5.4s, #1<<7, lsl #24 + ld2 {v0.4s, v1.4s}, [x0], #32 + eor v1.16b, v1.16b, v5.16b + ld2 {v2.4s, v3.4s}, [x0], #32 .rept 3 - st2 {v0.4S, v1.4S}, [x1], #32 - eor v3.16B, v3.16B, v5.16B - ld2 {v0.4S, v1.4S}, [x0], #32 - st2 {v2.4S, v3.4S}, [x1], #32 - eor v1.16B, v1.16B, v5.16B - ld2 {v2.4S, v3.4S}, [x0], #32 + st2 {v0.4s, v1.4s}, [x1], #32 + eor v3.16b, v3.16b, v5.16b + ld2 {v0.4s, v1.4s}, [x0], #32 + st2 {v2.4s, v3.4s}, [x1], #32 + eor v1.16b, v1.16b, v5.16b + ld2 {v2.4s, v3.4s}, [x0], #32 .endr - eor v3.16B, v3.16B, v5.16B - st2 {v0.4S, v1.4S}, [x1], #32 - st2 {v2.4S, v3.4S}, [x1], #32 + eor v3.16b, v3.16b, v5.16b + st2 {v0.4s, v1.4s}, [x1], #32 + st2 {v2.4s, v3.4s}, [x1], #32 ret endfunc @@ -97,26 +97,26 @@ function ff_sbr_qmf_pre_shuffle_neon, export=1 add x2, x0, #64*4 mov x3, #-16 mov x4, #-4 - movi v6.4S, #1<<7, lsl #24 - ld1 {v0.2S}, [x0], #8 - st1 {v0.2S}, [x2], #8 + movi v6.4s, #1<<7, lsl #24 + ld1 {v0.2s}, [x0], #8 + st1 {v0.2s}, [x2], #8 .rept 7 - ld1 {v1.4S}, [x1], x3 - ld1 {v2.4S}, [x0], #16 - eor v1.16B, v1.16B, v6.16B - rev64 v1.4S, v1.4S - ext v1.16B, v1.16B, v1.16B, #8 - st2 {v1.4S, v2.4S}, [x2], #32 + ld1 {v1.4s}, [x1], x3 + ld1 {v2.4s}, [x0], #16 + eor v1.16b, v1.16b, v6.16b + rev64 v1.4s, v1.4s + ext v1.16b, v1.16b, v1.16b, #8 + st2 {v1.4s, v2.4s}, [x2], #32 .endr add x1, x1, #8 - ld1 {v1.2S}, [x1], x4 - ld1 {v2.2S}, [x0], #8 - ld1 {v1.S}[3], [x1] - ld1 {v2.S}[2], [x0] - eor v1.16B, v1.16B, v6.16B - rev64 v1.4S, v1.4S - st2 {v1.2S, v2.2S}, [x2], #16 - st2 {v1.S, v2.S}[2], [x2] + ld1 {v1.2s}, [x1], x4 + ld1 {v2.2s}, [x0], #8 + ld1 {v1.s}[3], [x1] + ld1 {v2.s}[2], [x0] + eor v1.16b, v1.16b, v6.16b + rev64 v1.4s, v1.4s + st2 {v1.2s, v2.2s}, [x2], #16 + st2 {v1.s, v2.s}[2], [x2] ret endfunc @@ -124,13 +124,13 @@ function ff_sbr_qmf_post_shuffle_neon, export=1 add x2, x1, #60*4 mov x3, #-16 mov x4, #32 - movi v6.4S, #1<<7, lsl #24 -1: ld1 {v0.4S}, [x2], x3 - ld1 {v1.4S}, [x1], #16 - eor v0.16B, v0.16B, v6.16B - rev64 v0.4S, v0.4S - ext v0.16B, v0.16B, v0.16B, #8 - st2 {v0.4S, v1.4S}, [x0], #32 + movi v6.4s, #1<<7, lsl #24 +1: ld1 {v0.4s}, [x2], x3 + ld1 {v1.4s}, [x1], #16 + eor v0.16b, v0.16b, v6.16b + rev64 v0.4s, v0.4s + ext v0.16b, v0.16b, v0.16b, #8 + st2 {v0.4s, v1.4s}, [x0], #32 subs x4, x4, #4 b.gt 1b ret @@ -141,13 +141,13 @@ function ff_sbr_qmf_deint_neg_neon, export=1 add x2, x0, #60*4 mov x3, #-32 mov x4, #32 - movi v2.4S, #1<<7, lsl #24 -1: ld2 {v0.4S, v1.4S}, [x1], x3 - eor v0.16B, v0.16B, v2.16B - rev64 v1.4S, v1.4S - ext v1.16B, v1.16B, v1.16B, #8 - st1 {v0.4S}, [x2] - st1 {v1.4S}, [x0], #16 + movi v2.4s, #1<<7, lsl #24 +1: ld2 {v0.4s, v1.4s}, [x1], x3 + eor v0.16b, v0.16b, v2.16b + rev64 v1.4s, v1.4s + ext v1.16b, v1.16b, v1.16b, #8 + st1 {v0.4s}, [x2] + st1 {v1.4s}, [x0], #16 sub x2, x2, #16 subs x4, x4, #4 b.gt 1b @@ -159,16 +159,16 @@ function ff_sbr_qmf_deint_bfly_neon, export=1 add x3, x0, #124*4 mov x4, #64 mov x5, #-16 -1: ld1 {v0.4S}, [x1], #16 - ld1 {v1.4S}, [x2], x5 - rev64 v2.4S, v0.4S - ext v2.16B, v2.16B, v2.16B, #8 - rev64 v3.4S, v1.4S - ext v3.16B, v3.16B, v3.16B, #8 - fadd v1.4S, v1.4S, v2.4S - fsub v0.4S, v0.4S, v3.4S - st1 {v0.4S}, [x0], #16 - st1 {v1.4S}, [x3], x5 +1: ld1 {v0.4s}, [x1], #16 + ld1 {v1.4s}, [x2], x5 + rev64 v2.4s, v0.4s + ext v2.16b, v2.16b, v2.16b, #8 + rev64 v3.4s, v1.4s + ext v3.16b, v3.16b, v3.16b, #8 + fadd v1.4s, v1.4s, v2.4s + fsub v0.4s, v0.4s, v3.4s + st1 {v0.4s}, [x0], #16 + st1 {v1.4s}, [x3], x5 subs x4, x4, #4 b.gt 1b ret @@ -178,32 +178,32 @@ function ff_sbr_hf_gen_neon, export=1 sxtw x4, w4 sxtw x5, w5 movrel x6, factors - ld1 {v7.4S}, [x6] - dup v1.4S, v0.S[0] - mov v2.8B, v1.8B - mov v2.S[2], v7.S[0] - mov v2.S[3], v7.S[0] - fmul v1.4S, v1.4S, v2.4S - ld1 {v0.D}[0], [x3] - ld1 {v0.D}[1], [x2] - fmul v0.4S, v0.4S, v1.4S - fmul v1.4S, v0.4S, v7.4S - rev64 v0.4S, v0.4S + ld1 {v7.4s}, [x6] + dup v1.4s, v0.s[0] + mov v2.8b, v1.8b + mov v2.s[2], v7.s[0] + mov v2.s[3], v7.s[0] + fmul v1.4s, v1.4s, v2.4s + ld1 {v0.d}[0], [x3] + ld1 {v0.d}[1], [x2] + fmul v0.4s, v0.4s, v1.4s + fmul v1.4s, v0.4s, v7.4s + rev64 v0.4s, v0.4s sub x7, x5, x4 add x0, x0, x4, lsl #3 add x1, x1, x4, lsl #3 sub x1, x1, #16 -1: ld1 {v2.4S}, [x1], #16 - ld1 {v3.2S}, [x1] - fmul v4.4S, v2.4S, v1.4S - fmul v5.4S, v2.4S, v0.4S - faddp v4.4S, v4.4S, v4.4S - faddp v5.4S, v5.4S, v5.4S - faddp v4.4S, v4.4S, v4.4S - faddp v5.4S, v5.4S, v5.4S - mov v4.S[1], v5.S[0] - fadd v4.2S, v4.2S, v3.2S - st1 {v4.2S}, [x0], #8 +1: ld1 {v2.4s}, [x1], #16 + ld1 {v3.2s}, [x1] + fmul v4.4s, v2.4s, v1.4s + fmul v5.4s, v2.4s, v0.4s + faddp v4.4s, v4.4s, v4.4s + faddp v5.4s, v5.4s, v5.4s + faddp v4.4s, v4.4s, v4.4s + faddp v5.4s, v5.4s, v5.4s + mov v4.s[1], v5.s[0] + fadd v4.2s, v4.2s, v3.2s + st1 {v4.2s}, [x0], #8 sub x1, x1, #8 subs x7, x7, #1 b.gt 1b @@ -215,10 +215,10 @@ function ff_sbr_hf_g_filt_neon, export=1 sxtw x4, w4 mov x5, #40*2*4 add x1, x1, x4, lsl #3 -1: ld1 {v0.2S}, [x1], x5 - ld1 {v1.S}[0], [x2], #4 - fmul v2.4S, v0.4S, v1.S[0] - st1 {v2.2S}, [x0], #8 +1: ld1 {v0.2s}, [x1], x5 + ld1 {v1.s}[0], [x2], #4 + fmul v2.4s, v0.4s, v1.s[0] + st1 {v2.2s}, [x0], #8 subs x3, x3, #1 b.gt 1b ret @@ -227,46 +227,46 @@ endfunc function ff_sbr_autocorrelate_neon, export=1 mov x2, #38 movrel x3, factors - ld1 {v0.4S}, [x3] - movi v1.4S, #0 - movi v2.4S, #0 - movi v3.4S, #0 - ld1 {v4.2S}, [x0], #8 - ld1 {v5.2S}, [x0], #8 - fmul v16.2S, v4.2S, v4.2S - fmul v17.2S, v5.2S, v4.S[0] - fmul v18.2S, v5.2S, v4.S[1] -1: ld1 {v5.D}[1], [x0], #8 - fmla v1.2S, v4.2S, v4.2S - fmla v2.4S, v5.4S, v4.S[0] - fmla v3.4S, v5.4S, v4.S[1] - mov v4.D[0], v5.D[0] - mov v5.D[0], v5.D[1] + ld1 {v0.4s}, [x3] + movi v1.4s, #0 + movi v2.4s, #0 + movi v3.4s, #0 + ld1 {v4.2s}, [x0], #8 + ld1 {v5.2s}, [x0], #8 + fmul v16.2s, v4.2s, v4.2s + fmul v17.2s, v5.2s, v4.s[0] + fmul v18.2s, v5.2s, v4.s[1] +1: ld1 {v5.d}[1], [x0], #8 + fmla v1.2s, v4.2s, v4.2s + fmla v2.4s, v5.4s, v4.s[0] + fmla v3.4s, v5.4s, v4.s[1] + mov v4.d[0], v5.d[0] + mov v5.d[0], v5.d[1] subs x2, x2, #1 b.gt 1b - fmul v19.2S, v4.2S, v4.2S - fmul v20.2S, v5.2S, v4.S[0] - fmul v21.2S, v5.2S, v4.S[1] - fadd v22.4S, v2.4S, v20.4S - fsub v22.4S, v22.4S, v17.4S - fadd v23.4S, v3.4S, v21.4S - fsub v23.4S, v23.4S, v18.4S - rev64 v23.4S, v23.4S - fmul v23.4S, v23.4S, v0.4S - fadd v22.4S, v22.4S, v23.4S - st1 {v22.4S}, [x1], #16 - fadd v23.2S, v1.2S, v19.2S - fsub v23.2S, v23.2S, v16.2S - faddp v23.2S, v23.2S, v23.2S - st1 {v23.S}[0], [x1] + fmul v19.2s, v4.2s, v4.2s + fmul v20.2s, v5.2s, v4.s[0] + fmul v21.2s, v5.2s, v4.s[1] + fadd v22.4s, v2.4s, v20.4s + fsub v22.4s, v22.4s, v17.4s + fadd v23.4s, v3.4s, v21.4s + fsub v23.4s, v23.4s, v18.4s + rev64 v23.4s, v23.4s + fmul v23.4s, v23.4s, v0.4s + fadd v22.4s, v22.4s, v23.4s + st1 {v22.4s}, [x1], #16 + fadd v23.2s, v1.2s, v19.2s + fsub v23.2s, v23.2s, v16.2s + faddp v23.2s, v23.2s, v23.2s + st1 {v23.s}[0], [x1] add x1, x1, #8 - rev64 v3.2S, v3.2S - fmul v3.2S, v3.2S, v0.2S - fadd v2.2S, v2.2S, v3.2S - st1 {v2.2S}, [x1] + rev64 v3.2s, v3.2s + fmul v3.2s, v3.2s, v0.2s + fadd v2.2s, v2.2s, v3.2s + st1 {v2.2s}, [x1] add x1, x1, #16 - faddp v1.2S, v1.2S, v1.2S - st1 {v1.S}[0], [x1] + faddp v1.2s, v1.2s, v1.2s + st1 {v1.s}[0], [x1] ret endfunc @@ -278,25 +278,25 @@ endfunc 1: and x3, x3, #0x1ff add x8, x7, x3, lsl #3 add x3, x3, #2 - ld1 {v2.4S}, [x0] - ld1 {v3.2S}, [x1], #8 - ld1 {v4.2S}, [x2], #8 - ld1 {v5.4S}, [x8] - mov v6.16B, v2.16B - zip1 v3.4S, v3.4S, v3.4S - zip1 v4.4S, v4.4S, v4.4S - fmla v6.4S, v1.4S, v3.4S - fmla v2.4S, v5.4S, v4.4S - fcmeq v7.4S, v3.4S, #0 - bif v2.16B, v6.16B, v7.16B - st1 {v2.4S}, [x0], #16 + ld1 {v2.4s}, [x0] + ld1 {v3.2s}, [x1], #8 + ld1 {v4.2s}, [x2], #8 + ld1 {v5.4s}, [x8] + mov v6.16b, v2.16b + zip1 v3.4s, v3.4s, v3.4s + zip1 v4.4s, v4.4s, v4.4s + fmla v6.4s, v1.4s, v3.4s + fmla v2.4s, v5.4s, v4.4s + fcmeq v7.4s, v3.4s, #0 + bif v2.16b, v6.16b, v7.16b + st1 {v2.4s}, [x0], #16 subs x5, x5, #2 b.gt 1b .endm function ff_sbr_hf_apply_noise_0_neon, export=1 movrel x9, phi_noise_0 - ld1 {v1.4S}, [x9] + ld1 {v1.4s}, [x9] apply_noise_common ret endfunc @@ -305,14 +305,14 @@ function ff_sbr_hf_apply_noise_1_neon, export=1 movrel x9, phi_noise_1 and x4, x4, #1 add x9, x9, x4, lsl #4 - ld1 {v1.4S}, [x9] + ld1 {v1.4s}, [x9] apply_noise_common ret endfunc function ff_sbr_hf_apply_noise_2_neon, export=1 movrel x9, phi_noise_2 - ld1 {v1.4S}, [x9] + ld1 {v1.4s}, [x9] apply_noise_common ret endfunc @@ -321,7 +321,7 @@ function ff_sbr_hf_apply_noise_3_neon, export=1 movrel x9, phi_noise_3 and x4, x4, #1 add x9, x9, x4, lsl #4 - ld1 {v1.4S}, [x9] + ld1 {v1.4s}, [x9] apply_noise_common ret endfunc |