aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2024-07-25 18:38:32 +0300
committerRémi Denis-Courmont <remi@remlab.net>2024-08-01 18:44:01 +0300
commitd527d238728bf9b75d55960c4279134d0fba493b (patch)
tree01d99a18acdabd491af4aac2a8c46fad372d903b
parent656a9664bf828fc19b1e5a6e982bbcad456d1092 (diff)
downloadffmpeg-d527d238728bf9b75d55960c4279134d0fba493b.tar.gz
lavc/pixblockdsp: specialise aligned 16-bit get_pixels
The current code assumes that we have unaligned rows, which hurts on platforms with slower unaligned accesses. (Also, this lets the compiler unroll manually, which it seems to do in practice.)
-rw-r--r--libavcodec/pixblockdsp.c9
1 files changed, 8 insertions, 1 deletions
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index bbbeca1618..1fff244511 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -27,6 +27,13 @@
static void get_pixels_16_c(int16_t *restrict block, const uint8_t *pixels,
ptrdiff_t stride)
{
+ for (int i = 0; i < 8; i++)
+ AV_COPY128(block + i * 8, pixels + i * stride);
+}
+
+static void get_pixels_unaligned_16_c(int16_t *restrict block,
+ const uint8_t *pixels, ptrdiff_t stride)
+{
AV_COPY128U(block + 0 * 8, pixels + 0 * stride);
AV_COPY128U(block + 1 * 8, pixels + 1 * stride);
AV_COPY128U(block + 2 * 8, pixels + 2 * stride);
@@ -90,7 +97,7 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
case 10:
case 12:
case 14:
- c->get_pixels_unaligned =
+ c->get_pixels_unaligned = get_pixels_unaligned_16_c;
c->get_pixels = get_pixels_16_c;
break;
default: