aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter/x86
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2017-12-01 20:56:45 +0100
committerPaul B Mahol <onemda@gmail.com>2017-12-04 09:58:25 +0100
commit86fda8be3f3892c48474a319e0ef7509dc137e3e (patch)
tree0cb45a7a8dead1053f2d91d7c5f1f00cce669d86 /libavfilter/x86
parentd1d6f965d81bab2076d67cdc1dabd5a1f5bb2d30 (diff)
downloadffmpeg-86fda8be3f3892c48474a319e0ef7509dc137e3e.tar.gz
avfilter: add hflip x86 SIMD
Signed-off-by: Paul B Mahol <onemda@gmail.com>
Diffstat (limited to 'libavfilter/x86')
-rw-r--r--libavfilter/x86/Makefile2
-rw-r--r--libavfilter/x86/vf_hflip.asm108
-rw-r--r--libavfilter/x86/vf_hflip_init.c41
3 files changed, 151 insertions, 0 deletions
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index c10f4d5538..2fc5c62644 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -5,6 +5,7 @@ OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
+OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip_init.o
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace_init.o
@@ -32,6 +33,7 @@ X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
+X86ASM-OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip.o
X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
diff --git a/libavfilter/x86/vf_hflip.asm b/libavfilter/x86/vf_hflip.asm
new file mode 100644
index 0000000000..867dcaeed5
--- /dev/null
+++ b/libavfilter/x86/vf_hflip.asm
@@ -0,0 +1,108 @@
+;*****************************************************************************
+;* x86-optimized functions for hflip filter
+;*
+;* Copyright (C) 2017 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_flip_byte: db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+pb_flip_short: db 14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1
+
+SECTION .text
+
+INIT_XMM ssse3
+cglobal hflip_byte, 3, 6, 3, src, dst, w, x, v, r
+ mova m0, [pb_flip_byte]
+ mov xq, 0
+ mov wd, dword wm
+ mov rq, wq
+ and rq, 2 * mmsize - 1
+ cmp wq, 2 * mmsize
+ jl .loop1
+ sub wq, rq
+
+ .loop0:
+ neg xq
+ movu m1, [srcq + xq - mmsize + 1]
+ movu m2, [srcq + xq - 2 * mmsize + 1]
+ pshufb m1, m0
+ pshufb m2, m0
+ neg xq
+ movu [dstq + xq ], m1
+ movu [dstq + xq + mmsize], m2
+ add xq, mmsize * 2
+ cmp xq, wq
+ jl .loop0
+
+ cmp rq, 0
+ je .end
+ add wq, rq
+
+ .loop1:
+ neg xq
+ mov vb, [srcq + xq]
+ neg xq
+ mov [dstq + xq], vb
+ add xq, 1
+ cmp xq, wq
+ jl .loop1
+ .end:
+RET
+
+cglobal hflip_short, 3, 6, 3, src, dst, w, x, v, r
+ mova m0, [pb_flip_short]
+ mov xq, 0
+ mov wd, dword wm
+ add wq, wq
+ mov rq, wq
+ and rq, 2 * mmsize - 1
+ cmp wq, 2 * mmsize
+ jl .loop1
+ sub wq, rq
+
+ .loop0:
+ neg xq
+ movu m1, [srcq + xq - mmsize + 2]
+ movu m2, [srcq + xq - 2 * mmsize + 2]
+ pshufb m1, m0
+ pshufb m2, m0
+ neg xq
+ movu [dstq + xq ], m1
+ movu [dstq + xq + mmsize], m2
+ add xq, mmsize * 2
+ cmp xq, wq
+ jl .loop0
+
+ cmp rq, 0
+ je .end
+ add wq, rq
+
+ .loop1:
+ neg xq
+ mov vw, [srcq + xq]
+ neg xq
+ mov [dstq + xq], vw
+ add xq, 2
+ cmp xq, wq
+ jl .loop1
+ .end:
+RET
diff --git a/libavfilter/x86/vf_hflip_init.c b/libavfilter/x86/vf_hflip_init.c
new file mode 100644
index 0000000000..2b5c9d3bf3
--- /dev/null
+++ b/libavfilter/x86/vf_hflip_init.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/hflip.h"
+
+void ff_hflip_byte_ssse3(const uint8_t *src, uint8_t *dst, int w);
+void ff_hflip_short_ssse3(const uint8_t *src, uint8_t *dst, int w);
+
+av_cold void ff_hflip_init_x86(FlipContext *s, int step[4], int nb_planes)
+{
+ int cpu_flags = av_get_cpu_flags();
+ int i;
+
+ for (i = 0; i < nb_planes; i++) {
+ if (EXTERNAL_SSSE3(cpu_flags) && step[i] == 1) {
+ s->flip_line[i] = ff_hflip_byte_ssse3;
+ } else if (EXTERNAL_SSSE3(cpu_flags) && step[i] == 2) {
+ s->flip_line[i] = ff_hflip_short_ssse3;
+ }
+ }
+}