diff options
author | Martin Storsjö <martin@martin.st> | 2021-05-17 12:48:03 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2021-05-22 22:33:20 +0300 |
commit | c813f5e3436b5ba40b105cdaaaa7b1184baabde7 (patch) | |
tree | 2249ec01469d36c991001bf625912b600ec9c77b | |
parent | f7468a9c4037fd406847af4348c0deb2d521d0fc (diff) | |
download | ffmpeg-c813f5e3436b5ba40b105cdaaaa7b1184baabde7.tar.gz |
aarch64: hevc_idct: Fix overflows in idct_dc
This is marginally slower, but correct for all input values.
The previous implementation failed with certain input seeds, e.g.
"checkasm --test=hevc_idct 98".
Signed-off-by: Martin Storsjö <martin@martin.st>
(cherry picked from commit f27e3ccf06ee19935d160164ca4a02f28cfc2a27)
-rw-r--r-- | libavcodec/aarch64/hevcdsp_idct_neon.S | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S index 28c11e632c..0869431294 100644 --- a/libavcodec/aarch64/hevcdsp_idct_neon.S +++ b/libavcodec/aarch64/hevcdsp_idct_neon.S @@ -573,14 +573,13 @@ idct_16x16 10 // void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs) .macro idct_dc size, bitdepth function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1 - movi v1.8h, #((1 << (14 - \bitdepth))+1) ld1r {v4.8h}, [x0] - add v4.8h, v4.8h, v1.8h - sshr v0.8h, v4.8h, #(15 - \bitdepth) - sshr v1.8h, v4.8h, #(15 - \bitdepth) + srshr v4.8h, v4.8h, #1 + srshr v0.8h, v4.8h, #(14 - \bitdepth) + srshr v1.8h, v4.8h, #(14 - \bitdepth) .if \size > 4 - sshr v2.8h, v4.8h, #(15 - \bitdepth) - sshr v3.8h, v4.8h, #(15 - \bitdepth) + srshr v2.8h, v4.8h, #(14 - \bitdepth) + srshr v3.8h, v4.8h, #(14 - \bitdepth) .if \size > 16 /* dc 32x32 */ mov x2, #4 1: |