diff options
author | Clément Bœsch <u@pkh.me> | 2014-08-02 18:54:43 +0200 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-08-05 21:05:52 +0200 |
commit | 28a2107a8d61af7c7a26f9d4af0716ba12c112a7 (patch) | |
tree | 2dd49b789e0296fe6238adb9cc6be46c304403ce /libavutil/x86 | |
parent | 6931d1274568ecb1c957157ed92066bde52073d8 (diff) | |
download | ffmpeg-28a2107a8d61af7c7a26f9d4af0716ba12c112a7.tar.gz |
avutil: add pixelutils API
Diffstat (limited to 'libavutil/x86')
-rw-r--r-- | libavutil/x86/Makefile | 4 | ||||
-rw-r--r-- | libavutil/x86/pixelutils.asm | 155 | ||||
-rw-r--r-- | libavutil/x86/pixelutils.h | 26 | ||||
-rw-r--r-- | libavutil/x86/pixelutils_init.c | 58 |
4 files changed, 243 insertions, 0 deletions
diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 1e19082233..ad3bdfc29d 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -2,7 +2,11 @@ OBJS += x86/cpu.o \ x86/float_dsp_init.o \ x86/lls_init.o \ +OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils_init.o \ + YASM-OBJS += x86/cpuid.o \ x86/emms.o \ x86/float_dsp.o \ x86/lls.o \ + +YASM-OBJS-$(CONFIG_PIXELUTILS) += x86/pixelutils.o \ diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm new file mode 100644 index 0000000000..8ab0a18355 --- /dev/null +++ b/libavutil/x86/pixelutils.asm @@ -0,0 +1,155 @@ +;****************************************************************************** +;* Pixel utilities SIMD +;* +;* Copyright (C) 2002-2004 Michael Niedermayer <michaelni@gmx.at> +;* Copyright (C) 2014 Clément Bœsch <u pkh me> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86util.asm" + +SECTION_TEXT + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmx +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m7, m7 + pxor m6, m6 +%rep 4 + mova m0, [src1q] + mova m2, [src1q + stride1q] + mova m1, [src2q] + mova m3, [src2q + stride2q] + psubusb m4, m0, m1 + psubusb m5, m2, m3 + psubusb m1, m0 + psubusb m3, m2 + por m1, m4 + por m3, m5 + punpcklbw m0, m1, m7 + punpcklbw m2, m3, m7 + punpckhbw m1, m7 + punpckhbw m3, m7 + paddw m0, m1 + paddw m2, m3 + paddw m0, m2 + paddw m6, m0 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + psrlq m0, m6, 32 + paddw m6, m0 + psrlq m0, m6, 16 + paddw m6, m0 + movd eax, m6 + movzx eax, ax + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 4 + mova m0, [src1q] + mova m1, [src1q + stride1q] + psadbw m0, [src2q] + psadbw m1, [src2q + stride2q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_MMX mmxext +cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 16 + mova m0, [src1q] + mova m1, [src1q + 8] + psadbw m0, [src2q] + psadbw m1, [src2q + 8] + paddw m2, m0 + paddw m2, m1 + add src1q, stride1q + add src2q, stride2q +%endrep + movd eax, m2 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +INIT_XMM sse2 +cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 + pxor m4, m4 +%rep 8 + movu m0, [src1q] + movu m1, [src1q + stride1q] + movu m2, [src2q] + movu m3, [src2q + stride2q] + psadbw m0, m2 + psadbw m1, m3 + paddw m4, m0 + paddw m4, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m4 + paddw m4, m0 + movd eax, m4 + RET + +;------------------------------------------------------------------------------- +; int ff_pixelutils_sad_[au]_16x16_sse(const uint8_t *src1, ptrdiff_t stride1, +; const uint8_t *src2, ptrdiff_t stride2); +;------------------------------------------------------------------------------- +%macro SAD_XMM_16x16 1 +INIT_XMM sse2 +cglobal pixelutils_sad_%1_16x16, 4,4,3, src1, stride1, src2, stride2 + pxor m2, m2 +%rep 8 + mov%1 m0, [src2q] + mov%1 m1, [src2q + stride2q] + psadbw m0, [src1q] + psadbw m1, [src1q + stride1q] + paddw m2, m0 + paddw m2, m1 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] +%endrep + movhlps m0, m2 + paddw m2, m0 + movd eax, m2 + RET +%endmacro + +SAD_XMM_16x16 a +SAD_XMM_16x16 u diff --git a/libavutil/x86/pixelutils.h b/libavutil/x86/pixelutils.h new file mode 100644 index 0000000000..876cf46053 --- /dev/null +++ b/libavutil/x86/pixelutils.h @@ -0,0 +1,26 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_X86_PIXELUTILS_H +#define AVUTIL_X86_PIXELUTILS_H + +#include "libavutil/pixelutils.h" + +void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned); + +#endif /* AVUTIL_X86_PIXELUTILS_H */ diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c new file mode 100644 index 0000000000..d60051067a --- /dev/null +++ b/libavutil/x86/pixelutils_init.c @@ -0,0 +1,58 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "pixelutils.h" +#include "cpu.h" + +int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { + sad[2] = ff_pixelutils_sad_8x8_mmx; + } + + if (EXTERNAL_MMXEXT(cpu_flags)) { + sad[2] = ff_pixelutils_sad_8x8_mmxext; + sad[3] = ff_pixelutils_sad_16x16_mmxext; + } + + if (EXTERNAL_SSE2(cpu_flags)) { + switch (aligned) { + case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned + case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned + case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned + } + } +} |