aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2023-11-11 17:08:45 +0200
committerRémi Denis-Courmont <remi@remlab.net>2023-11-14 19:15:51 +0200
commitce467421dc9e2061b8af22973ba4ba6248f16de9 (patch)
tree7023f8389c8a5ba6ba7fda0b8d40c4f4664cfa51 /libavcodec
parentc536e9220702dec7fbccd6a03f043cc142d68c79 (diff)
downloadffmpeg-ce467421dc9e2061b8af22973ba4ba6248f16de9.tar.gz
lavc/exrdsp: unroll predictor
With explicit unrolling, we can skip half of the sign bit flips, and the compiler is then better able to optimise the scalar loop: predictor_c: 31376.0 (before) predictor_c: 23703.0 (after)
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/exrdsp.c16
1 files changed, 13 insertions, 3 deletions
diff --git a/libavcodec/exrdsp.c b/libavcodec/exrdsp.c
index 752e1eb553..248cb93c5a 100644
--- a/libavcodec/exrdsp.c
+++ b/libavcodec/exrdsp.c
@@ -40,10 +40,20 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si
static void predictor_scalar(uint8_t *src, ptrdiff_t size)
{
- ptrdiff_t i;
+ /* Unrolled: `src[i + 1] += src[i] - 128;` */
+ if ((size & 1) == 0) {
+ src[1] += src[0] ^ 0x80;
+ src++;
+ size--;
+ }
+
+ for (ptrdiff_t i = 1; i < size; i += 2) {
+ uint8_t a = src[i] + src[i - 1];
- for (i = 1; i < size; i++)
- src[i] += src[i-1] - 128;
+ src[i] = a;
+ src[i + 1] += a;
+ src[i] ^= 0x80;
+ }
}
av_cold void ff_exrdsp_init(ExrDSPContext *c)