diff options
author | Clément Bœsch <u@pkh.me> | 2014-08-02 18:54:43 +0200 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-08-05 21:05:52 +0200 |
commit | 28a2107a8d61af7c7a26f9d4af0716ba12c112a7 (patch) | |
tree | 2dd49b789e0296fe6238adb9cc6be46c304403ce /libavutil | |
parent | 6931d1274568ecb1c957157ed92066bde52073d8 (diff) | |
download | ffmpeg-28a2107a8d61af7c7a26f9d4af0716ba12c112a7.tar.gz |
avutil: add pixelutils API
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/Makefile | 3 | ||||
-rw-r--r-- | libavutil/pixelutils.c | 153 | ||||
-rw-r--r-- | libavutil/pixelutils.h | 52 | ||||
-rw-r--r-- | libavutil/version.h | 2 | ||||
-rw-r--r-- | libavutil/x86/Makefile | 4 | ||||
-rw-r--r-- | libavutil/x86/pixelutils.asm | 155 | ||||
-rw-r--r-- | libavutil/x86/pixelutils.h | 26 | ||||
-rw-r--r-- | libavutil/x86/pixelutils_init.c | 58 |
8 files changed, 452 insertions, 1 deletions
diff --git a/libavutil/Makefile b/libavutil/Makefile index 91751dc4c3..d57a741a0a 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -44,6 +44,7 @@ HEADERS = adler32.h \ opt.h \ parseutils.h \ pixdesc.h \ + pixelutils.h \ pixfmt.h \ random_seed.h \ replaygain.h \ @@ -113,6 +114,7 @@ OBJS = adler32.o \ opt.o \ parseutils.o \ pixdesc.o \ + pixelutils.o \ random_seed.o \ rational.o \ rc4.o \ @@ -170,6 +172,7 @@ TESTPROGS = adler32 \ pca \ parseutils \ pixdesc \ + pixelutils \ random_seed \ rational \ ripemd \ diff --git a/libavutil/pixelutils.c b/libavutil/pixelutils.c new file mode 100644 index 0000000000..cfdd35afc2 --- /dev/null +++ b/libavutil/pixelutils.c @@ -0,0 +1,153 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "common.h" +#include "pixelutils.h" + +#if CONFIG_PIXELUTILS + +#include "x86/pixelutils.h" + +static av_always_inline int sad_wxh(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2, + int w, int h) +{ + int x, y, sum = 0; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) + sum += abs(src1[x] - src2[x]); + src1 += stride1; + src2 += stride2; + } + return sum; +} + +#define DECLARE_BLOCK_FUNCTIONS(size) \ +static int block_sad_##size##x##size##_c(const uint8_t *src1, ptrdiff_t stride1, \ + const uint8_t *src2, ptrdiff_t stride2) \ +{ \ + return sad_wxh(src1, stride1, src2, stride2, size, size); \ +} + +DECLARE_BLOCK_FUNCTIONS(2) +DECLARE_BLOCK_FUNCTIONS(4) +DECLARE_BLOCK_FUNCTIONS(8) +DECLARE_BLOCK_FUNCTIONS(16) + +static const av_pixelutils_sad_fn sad_c[] = { + block_sad_2x2_c, + block_sad_4x4_c, + block_sad_8x8_c, + block_sad_16x16_c, +}; + +#endif /* CONFIG_PIXELUTILS */ + +av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligned, void *log_ctx) +{ +#if !CONFIG_PIXELUTILS + av_log(log_ctx, AV_LOG_ERROR, "pixelutils support is required " + "but libavutil is not compiled with it\n"); + return NULL; +#else + av_pixelutils_sad_fn sad[FF_ARRAY_ELEMS(sad_c)]; + + memcpy(sad, sad_c, sizeof(sad)); + + if (w_bits < 1 || w_bits > FF_ARRAY_ELEMS(sad) || + h_bits < 1 || h_bits > FF_ARRAY_ELEMS(sad)) + return NULL; + if (w_bits != h_bits) // only squared sad for now + return NULL; + +#if ARCH_X86 + ff_pixelutils_sad_init_x86(sad, aligned); +#endif + + return sad[w_bits - 1]; +#endif +} + +#ifdef TEST +#define W1 320 +#define H1 240 +#define W2 640 +#define H2 480 + +static int run_test(const char *test, + const uint32_t *b1, const uint32_t *b2) +{ + int i, a, ret = 0; + + for (a = 0; a < 3; a++) { + const uint8_t *block1 = (const uint8_t *)b1; + const uint8_t *block2 = (const uint8_t *)b2; + + switch (a) { + case 0: block1++; block2++; break; + case 1: block2++; break; + case 2: break; + } + for (i = 1; i <= FF_ARRAY_ELEMS(sad_c); i++) { + av_pixelutils_sad_fn f_ref = sad_c[i - 1]; + av_pixelutils_sad_fn f_out = av_pixelutils_get_sad_fn(i, i, a, NULL); + const int out = f_out(block1, W1, block2, W2); + const int ref = f_ref(block1, W1, block2, W2); + printf("[%s] [%c%c] SAD [%s] %dx%d=%d ref=%d\n", + out == ref ? "OK" : "FAIL", + a ? 'A' : 'U', a == 2 ? 'A' : 'U', + test, 1<<i, 1<<i, out, ref); + if (out != ref) + ret = 1; + } + } + return ret; +} + +int main(void) +{ + int i, ret; + DECLARE_ALIGNED(32, uint32_t, buf1)[W1*H1]; + DECLARE_ALIGNED(32, uint32_t, buf2)[W2*H2]; + uint32_t state = 0; + + for (i = 0; i < W1*H1; i++) { + state = state * 1664525 + 1013904223; + buf1[i] = state; + } + for (i = 0; i < W2*H2; i++) { + state = state * 1664525 + 1013904223; + buf2[i] = state; + } + ret = run_test("random", buf1, buf2); + if (ret < 0) + return ret; + + memset(buf1, 0xff, sizeof(buf1)); + memset(buf2, 0x00, sizeof(buf2)); + ret = run_test("max", buf1, buf2); + if (ret < 0) + return ret; + + memset(buf1, 0x90, sizeof(buf1)); + memset(buf2, 0x90, sizeof(buf2)); + return run_test("min", buf1, buf2); +} +#endif /* TEST */ diff --git a/libavutil/pixelutils.h b/libavutil/pixelutils.h new file mode 100644 index 0000000000..a8dbc157e1 --- /dev/null +++ b/libavutil/pixelutils.h @@ -0,0 +1,52 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_PIXELUTILS_H +#define AVUTIL_PIXELUTILS_H + +#include <stddef.h> +#include <stdint.h> +#include "common.h" + +/** + * Sum of abs(src1[x] - src2[x]) + */ +typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +/** + * Get a potentially optimized pointer to a Sum-of-absolute-differences + * function (see the av_pixelutils_sad_fn prototype). + * + * @param w_bits 1<<w_bits is the requested width of the block size + * @param h_bits 1<<h_bits is the requested height of the block size + * @param aligned If set to 2, the returned sad function will assume src1 and + * src2 addresses are aligned on the block size. + * If set to 1, the returned sad function will assume src1 is + * aligned on the block size. + * If set to 0, the returned sad function assume no particular + * alignment. + * @param log_ctx context used for logging, can be NULL + * + * @return a pointer to the SAD function or NULL in case of error (because of + * invalid parameters) + */ +av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, + int aligned, void *log_ctx); + +#endif /* AVUTIL_PIXELUTILS_H */ diff --git a/libavutil/version.h b/libavutil/version.h index 6f47baa436..85b02d6df3 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -56,7 +56,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 52 -#define LIBAVUTIL_VERSION_MINOR 97 +#define LIBAVUTIL_VERSION_MINOR 98 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 1e19082233..ad3bdfc29d 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -2,7 +2,11 @@ OBJS += x86/cpu.o \ x86/float_dsp_init.o \ x86/lls_init.o \ +OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \ + YASM-OBJS += x86/cpuid.o \ x86/emms.o \ x86/float_dsp.o \ x86/lls.o \ + +YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \ diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm new file mode 100644 index 0000000000..8ab0a18355 --- /dev/null +++ b/libavutil/x86/pixelutils.asm @@ -0,0 +1,155 @@ +;****************************************************************************** +;* Pixel utilities SIMD +;* +;* Copyright (C) 2002-2004 Michael Niedermayer <michaelni@gmx.at> +;* Copyright (C) 2014 Clément Bœsch <u pkh me> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86util.asm" + +SECTION_TEXT + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmx +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m7, m7 + pxor m6, m6 +%rep 4 + mova m0, [src1q] + mova m2, [src1q + stride1q] + mova m1, [src2q] + mova m3, [src2q + stride2q] + psubusb m4, m0, m1 + psubusb m5, m2, m3 + psubusb m1, m0 + psubusb m3, m2 + por m1, m4 + por m3, m5 + punpcklbw m0, m1, m7 + punpcklbw m2, m3, m7 + punpckhbw m1, m7 + punpckhbw m3, m7 + paddw m0, m1 + paddw m2, m3 + paddw m0, m2 + paddw m6, m0 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + psrlq m0, m6, 32 + paddw m6, m0 + psrlq m0, m6, 16 + paddw m6, m0 + movd eax, m6 + movzx eax, ax + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 4 + mova m0, [src1q] + mova m1, [src1q + stride1q] + psadbw m0, [src2q] + psadbw m1, [src2q + stride2q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 16 + mova m0, [src1q] + mova m1, [src1q + 8] + psadbw m0, [src2q] + psadbw m1, [src2q + 8] + paddw m2, m0 + paddw m2, m1 + add src1q, stride1q + add src2q, stride2q +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 + pxor m4, m4 +%rep 8 + movu m0, [src1q] + movu m1, [src1q + stride1q] + movu m2, [src2q] + movu m3, [src2q + stride2q] + psadbw m0, m2 + psadbw m1, m3 + paddw m4, m0 + paddw m4, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m4 + paddw m4, m0 + movd eax, m4 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +%macro SAD_XMM_16x16 1 +INIT_XMM sse2 +cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 8 + mov%1 m0, [src2q] + mov%1 m1, [src2q + stride2q] + psadbw m0, [src1q] + psadbw m1, [src1q + stride1q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m2 + paddw m2, m0 + movd eax, m2 + RET +%endmacro + +SAD_XMM_16x16 a +SAD_XMM_16x16 u diff --git a/libavutil/x86/pixelutils.h b/libavutil/x86/pixelutils.h new file mode 100644 index 0000000000..876cf46053 --- /dev/null +++ b/libavutil/x86/pixelutils.h @@ -0,0 +1,26 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_X86_PIXELUTILS_H +#define AVUTIL_X86_PIXELUTILS_H + +#include "libavutil/pixelutils.h" + +void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned); + +#endif /* AVUTIL_X86_PIXELUTILS_H */ diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c new file mode 100644 index 0000000000..d60051067a --- /dev/null +++ b/libavutil/x86/pixelutils_init.c @@ -0,0 +1,58 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "pixelutils.h" +#include "cpu.h" + +int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { + sad[2] = ff_pixelutils_sad_8x8_mmx; + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + sad[2] = ff_pixelutils_sad_8x8_mmxext; + sad[3] = ff_pixelutils_sad_16x16_mmxext; + } + + if (EXTERNAL_SSE2(cpu_flags)) { + switch (aligned) { + case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned + case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned + case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned + } + } +} |