aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/aarch64
diff options
context:
space:
mode:
authorJanne Grunau <j@jannau.net>2014-04-05 11:47:18 +0200
committerJanne Grunau <janne-libav@jannau.net>2014-04-06 21:18:49 +0200
commitd3789eeeed3423bd1ca9dc40030a2f7a21ea5332 (patch)
treee611418f1b7b3e83aa908fc03ed77485df17843e /libavcodec/aarch64
parent8675bcb0addb1c7fb0b04682d1f3f95d5b8dae14 (diff)
downloadffmpeg-d3789eeeed3423bd1ca9dc40030a2f7a21ea5332.tar.gz
aarch64: implement videodsp.prefetch
8% faster h264 decoding on Apple A7.
Diffstat (limited to 'libavcodec/aarch64')
-rw-r--r--libavcodec/aarch64/Makefile3
-rw-r--r--libavcodec/aarch64/videodsp.S28
-rw-r--r--libavcodec/aarch64/videodsp_init.c32
3 files changed, 63 insertions, 0 deletions
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 59d1762269..757b499db0 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -3,10 +3,13 @@ OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
+OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o
OBJS-$(CONFIG_VC1_DECODER) += aarch64/vc1dsp_init_aarch64.o
+ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o
+
NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \
aarch64/h264idct_neon.o
diff --git a/libavcodec/aarch64/videodsp.S b/libavcodec/aarch64/videodsp.S
new file mode 100644
index 0000000000..7ce5a7ddf6
--- /dev/null
+++ b/libavcodec/aarch64/videodsp.S
@@ -0,0 +1,28 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+function ff_prefetch_aarch64, export=1
+ subs w2, w2, #2
+ prfm pldl1strm, [x0]
+ prfm pldl1strm, [x0, x1]
+ add x0, x0, x1, lsl #1
+ b.gt X(ff_prefetch_aarch64)
+ ret
+endfunc
diff --git a/libavcodec/aarch64/videodsp_init.c b/libavcodec/aarch64/videodsp_init.c
new file mode 100644
index 0000000000..59b697d4f4
--- /dev/null
+++ b/libavcodec/aarch64/videodsp_init.c
@@ -0,0 +1,32 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/videodsp.h"
+
+void ff_prefetch_aarch64(uint8_t *mem, ptrdiff_t stride, int h);
+
+av_cold void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_armv8(cpu_flags))
+ ctx->prefetch = ff_prefetch_aarch64;
+}