aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2014-08-02 18:54:43 +0200
committerClément Bœsch <u@pkh.me>2014-08-05 21:05:52 +0200
commit28a2107a8d61af7c7a26f9d4af0716ba12c112a7 (patch)
tree2dd49b789e0296fe6238adb9cc6be46c304403ce /libavutil
parent6931d1274568ecb1c957157ed92066bde52073d8 (diff)
downloadffmpeg-28a2107a8d61af7c7a26f9d4af0716ba12c112a7.tar.gz
avutil: add pixelutils API
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/Makefile3
-rw-r--r--libavutil/pixelutils.c153
-rw-r--r--libavutil/pixelutils.h52
-rw-r--r--libavutil/version.h2
-rw-r--r--libavutil/x86/Makefile4
-rw-r--r--libavutil/x86/pixelutils.asm155
-rw-r--r--libavutil/x86/pixelutils.h26
-rw-r--r--libavutil/x86/pixelutils_init.c58
8 files changed, 452 insertions, 1 deletions
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 91751dc4c3..d57a741a0a 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -44,6 +44,7 @@ HEADERS = adler32.h \
opt.h \
parseutils.h \
pixdesc.h \
+ pixelutils.h \
pixfmt.h \
random_seed.h \
replaygain.h \
@@ -113,6 +114,7 @@ OBJS = adler32.o \
opt.o \
parseutils.o \
pixdesc.o \
+ pixelutils.o \
random_seed.o \
rational.o \
rc4.o \
@@ -170,6 +172,7 @@ TESTPROGS = adler32 \
pca \
parseutils \
pixdesc \
+ pixelutils \
random_seed \
rational \
ripemd \
diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c
new file mode 100644
index 0000000000..cfdd35afc2
--- /dev/null
+++ b/libavutil/pixelutils.c
@@ -0,0 +1,153 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "common.h"
+#include "pixelutils.h"
+
+#if CONFIG_PIXELUTILS
+
+#include "x86/pixelutils.h"
+
+static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2,
+ int w, int h)
+{
+ int x, y, sum = 0;
+
+ for (y = 0; y < h; y++) {
+ for (x = 0; x < w; x++)
+ sum += abs(src1[x] - src2[x]);
+ src1 += stride1;
+ src2 += stride2;
+ }
+ return sum;
+}
+
+#define DECLARE_BLOCK_FUNCTIONS(size) \
+static int block_sad_##size##x##size##_c(const uint8_t *src1, ptrdiff_t stride1, \
+ const uint8_t *src2, ptrdiff_t stride2) \
+{ \
+ return sad_wxh(src1, stride1, src2, stride2, size, size); \
+}
+
+DECLARE_BLOCK_FUNCTIONS(2)
+DECLARE_BLOCK_FUNCTIONS(4)
+DECLARE_BLOCK_FUNCTIONS(8)
+DECLARE_BLOCK_FUNCTIONS(16)
+
+static const av_pixelutils_sad_fn sad_c[] = {
+ block_sad_2x2_c,
+ block_sad_4x4_c,
+ block_sad_8x8_c,
+ block_sad_16x16_c,
+};
+
+#endif /* CONFIG_PIXELUTILS */
+
+av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligned, void *log_ctx)
+{
+#if !CONFIG_PIXELUTILS
+ av_log(log_ctx, AV_LOG_ERROR, "pixelutils support is required "
+ "but libavutil is not compiled with it\n");
+ return NULL;
+#else
+ av_pixelutils_sad_fn sad[FF_ARRAY_ELEMS(sad_c)];
+
+ memcpy(sad, sad_c, sizeof(sad));
+
+ if (w_bits < 1 || w_bits > FF_ARRAY_ELEMS(sad) ||
+ h_bits < 1 || h_bits > FF_ARRAY_ELEMS(sad))
+ return NULL;
+ if (w_bits != h_bits) // only squared sad for now
+ return NULL;
+
+#if ARCH_X86
+ ff_pixelutils_sad_init_x86(sad, aligned);
+#endif
+
+ return sad[w_bits - 1];
+#endif
+}
+
+#ifdef TEST
+#define W1 320
+#define H1 240
+#define W2 640
+#define H2 480
+
+static int run_test(const char *test,
+ const uint32_t *b1, const uint32_t *b2)
+{
+ int i, a, ret = 0;
+
+ for (a = 0; a < 3; a++) {
+ const uint8_t *block1 = (const uint8_t *)b1;
+ const uint8_t *block2 = (const uint8_t *)b2;
+
+ switch (a) {
+ case 0: block1++; block2++; break;
+ case 1: block2++; break;
+ case 2: break;
+ }
+ for (i = 1; i <= FF_ARRAY_ELEMS(sad_c); i++) {
+ av_pixelutils_sad_fn f_ref = sad_c[i - 1];
+ av_pixelutils_sad_fn f_out = av_pixelutils_get_sad_fn(i, i, a, NULL);
+ const int out = f_out(block1, W1, block2, W2);
+ const int ref = f_ref(block1, W1, block2, W2);
+ printf("[%s] [%c%c] SAD [%s] %dx%d=%d ref=%d\n",
+ out == ref ? "OK" : "FAIL",
+ a ? 'A' : 'U', a == 2 ? 'A' : 'U',
+ test, 1<<i, 1<<i, out, ref);
+ if (out != ref)
+ ret = 1;
+ }
+ }
+ return ret;
+}
+
+int main(void)
+{
+ int i, ret;
+ DECLARE_ALIGNED(32, uint32_t, buf1)[W1*H1];
+ DECLARE_ALIGNED(32, uint32_t, buf2)[W2*H2];
+ uint32_t state = 0;
+
+ for (i = 0; i < W1*H1; i++) {
+ state = state * 1664525 + 1013904223;
+ buf1[i] = state;
+ }
+ for (i = 0; i < W2*H2; i++) {
+ state = state * 1664525 + 1013904223;
+ buf2[i] = state;
+ }
+ ret = run_test("random", buf1, buf2);
+ if (ret < 0)
+ return ret;
+
+ memset(buf1, 0xff, sizeof(buf1));
+ memset(buf2, 0x00, sizeof(buf2));
+ ret = run_test("max", buf1, buf2);
+ if (ret < 0)
+ return ret;
+
+ memset(buf1, 0x90, sizeof(buf1));
+ memset(buf2, 0x90, sizeof(buf2));
+ return run_test("min", buf1, buf2);
+}
+#endif /* TEST */
diff --git a/libavutil/pixelutils.h b/libavutil/pixelutils.h
new file mode 100644
index 0000000000..a8dbc157e1
--- /dev/null
+++ b/libavutil/pixelutils.h
@@ -0,0 +1,52 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_PIXELUTILS_H
+#define AVUTIL_PIXELUTILS_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "common.h"
+
+/**
+ * Sum of abs(src1[x] - src2[x])
+ */
+typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+
+/**
+ * Get a potentially optimized pointer to a Sum-of-absolute-differences
+ * function (see the av_pixelutils_sad_fn prototype).
+ *
+ * @param w_bits 1<<w_bits is the requested width of the block size
+ * @param h_bits 1<<h_bits is the requested height of the block size
+ * @param aligned If set to 2, the returned sad function will assume src1 and
+ * src2 addresses are aligned on the block size.
+ * If set to 1, the returned sad function will assume src1 is
+ * aligned on the block size.
+ * If set to 0, the returned sad function assume no particular
+ * alignment.
+ * @param log_ctx context used for logging, can be NULL
+ *
+ * @return a pointer to the SAD function or NULL in case of error (because of
+ * invalid parameters)
+ */
+av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits,
+ int aligned, void *log_ctx);
+
+#endif /* AVUTIL_PIXELUTILS_H */
diff --git a/libavutil/version.h b/libavutil/version.h
index 6f47baa436..85b02d6df3 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -56,7 +56,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 52
-#define LIBAVUTIL_VERSION_MINOR 97
+#define LIBAVUTIL_VERSION_MINOR 98
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile
index 1e19082233..ad3bdfc29d 100644
--- a/libavutil/x86/Makefile
+++ b/libavutil/x86/Makefile
@@ -2,7 +2,11 @@ OBJS += x86/cpu.o \
x86/float_dsp_init.o \
x86/lls_init.o \
+OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \
+
YASM-OBJS += x86/cpuid.o \
x86/emms.o \
x86/float_dsp.o \
x86/lls.o \
+
+YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \
diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm
new file mode 100644
index 0000000000..8ab0a18355
--- /dev/null
+++ b/libavutil/x86/pixelutils.asm
@@ -0,0 +1,155 @@
+;******************************************************************************
+;* Pixel utilities SIMD
+;*
+;* Copyright (C) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+;* Copyright (C) 2014 Clément Bœsch <u pkh me>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86util.asm"
+
+SECTION_TEXT
+
+;-------------------------------------------------------------------------------
+; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
+; const uint8_t *src2, ptrdiff_t stride2);
+;-------------------------------------------------------------------------------
+INIT_MMX mmx
+cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2
+ pxor m7, m7
+ pxor m6, m6
+%rep 4
+ mova m0, [src1q]
+ mova m2, [src1q + stride1q]
+ mova m1, [src2q]
+ mova m3, [src2q + stride2q]
+ psubusb m4, m0, m1
+ psubusb m5, m2, m3
+ psubusb m1, m0
+ psubusb m3, m2
+ por m1, m4
+ por m3, m5
+ punpcklbw m0, m1, m7
+ punpcklbw m2, m3, m7
+ punpckhbw m1, m7
+ punpckhbw m3, m7
+ paddw m0, m1
+ paddw m2, m3
+ paddw m0, m2
+ paddw m6, m0
+ lea src1q, [src1q + 2*stride1q]
+ lea src2q, [src2q + 2*stride2q]
+%endrep
+ psrlq m0, m6, 32
+ paddw m6, m0
+ psrlq m0, m6, 16
+ paddw m6, m0
+ movd eax, m6
+ movzx eax, ax
+ RET
+
+;-------------------------------------------------------------------------------
+; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
+; const uint8_t *src2, ptrdiff_t stride2);
+;-------------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2
+ pxor m2, m2
+%rep 4
+ mova m0, [src1q]
+ mova m1, [src1q + stride1q]
+ psadbw m0, [src2q]
+ psadbw m1, [src2q + stride2q]
+ paddw m2, m0
+ paddw m2, m1
+ lea src1q, [src1q + 2*stride1q]
+ lea src2q, [src2q + 2*stride2q]
+%endrep
+ movd eax, m2
+ RET
+
+;-------------------------------------------------------------------------------
+; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
+; const uint8_t *src2, ptrdiff_t stride2);
+;-------------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2
+ pxor m2, m2
+%rep 16
+ mova m0, [src1q]
+ mova m1, [src1q + 8]
+ psadbw m0, [src2q]
+ psadbw m1, [src2q + 8]
+ paddw m2, m0
+ paddw m2, m1
+ add src1q, stride1q
+ add src2q, stride2q
+%endrep
+ movd eax, m2
+ RET
+
+;-------------------------------------------------------------------------------
+; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1,
+; const uint8_t *src2, ptrdiff_t stride2);
+;-------------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2
+ pxor m4, m4
+%rep 8
+ movu m0, [src1q]
+ movu m1, [src1q + stride1q]
+ movu m2, [src2q]
+ movu m3, [src2q + stride2q]
+ psadbw m0, m2
+ psadbw m1, m3
+ paddw m4, m0
+ paddw m4, m1
+ lea src1q, [src1q + 2*stride1q]
+ lea src2q, [src2q + 2*stride2q]
+%endrep
+ movhlps m0, m4
+ paddw m4, m0
+ movd eax, m4
+ RET
+
+;-------------------------------------------------------------------------------
+; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1,
+; const uint8_t *src2, ptrdiff_t stride2);
+;-------------------------------------------------------------------------------
+%macro SAD_XMM_16x16 1
+INIT_XMM sse2
+cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2
+ pxor m2, m2
+%rep 8
+ mov%1 m0, [src2q]
+ mov%1 m1, [src2q + stride2q]
+ psadbw m0, [src1q]
+ psadbw m1, [src1q + stride1q]
+ paddw m2, m0
+ paddw m2, m1
+ lea src1q, [src1q + 2*stride1q]
+ lea src2q, [src2q + 2*stride2q]
+%endrep
+ movhlps m0, m2
+ paddw m2, m0
+ movd eax, m2
+ RET
+%endmacro
+
+SAD_XMM_16x16 a
+SAD_XMM_16x16 u
diff --git a/libavutil/x86/pixelutils.h b/libavutil/x86/pixelutils.h
new file mode 100644
index 0000000000..876cf46053
--- /dev/null
+++ b/libavutil/x86/pixelutils.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_X86_PIXELUTILS_H
+#define AVUTIL_X86_PIXELUTILS_H
+
+#include "libavutil/pixelutils.h"
+
+void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned);
+
+#endif /* AVUTIL_X86_PIXELUTILS_H */
diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c
new file mode 100644
index 0000000000..d60051067a
--- /dev/null
+++ b/libavutil/x86/pixelutils_init.c
@@ -0,0 +1,58 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "pixelutils.h"
+#include "cpu.h"
+
+int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+
+int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
+ const uint8_t *src2, ptrdiff_t stride2);
+
+void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_MMX(cpu_flags)) {
+ sad[2] = ff_pixelutils_sad_8x8_mmx;
+ }
+
+ if (EXTERNAL_MMXEXT(cpu_flags)) {
+ sad[2] = ff_pixelutils_sad_8x8_mmxext;
+ sad[3] = ff_pixelutils_sad_16x16_mmxext;
+ }
+
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ switch (aligned) {
+ case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned
+ case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned
+ case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned
+ }
+ }
+}