diff options
author | Diego Biurrun <diego@biurrun.de> | 2014-01-14 10:33:47 +0100 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2014-06-18 14:07:23 -0700 |
commit | e74433a8e6fc00c8dbde293c97a3e45384c2c1d9 (patch) | |
tree | f975b37a58a7c6e62c84c12349610ce6f40ad4d1 /libavcodec/x86 | |
parent | 869fc416f7c78ed4e397e0208acd1545771c0502 (diff) | |
download | ffmpeg-e74433a8e6fc00c8dbde293c97a3e45384c2c1d9.tar.gz |
dsputil: Split clear_block*/fill_block* off into a separate context
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/Makefile | 1 | ||||
-rw-r--r-- | libavcodec/x86/blockdsp_mmx.c | 120 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_init.c | 17 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 56 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_x86.h | 5 |
5 files changed, 121 insertions, 78 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 5fddf3fb83..222a0ff9eb 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -44,6 +44,7 @@ OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp_init.o OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o +MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ x86/idct_mmx_xvid.o \ x86/idct_sse2_xvid.o \ diff --git a/libavcodec/x86/blockdsp_mmx.c b/libavcodec/x86/blockdsp_mmx.c new file mode 100644 index 0000000000..b5294242ab --- /dev/null +++ b/libavcodec/x86/blockdsp_mmx.c @@ -0,0 +1,120 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/internal.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/blockdsp.h" +#include "libavcodec/version.h" + +#if HAVE_INLINE_ASM + +#define CLEAR_BLOCKS(name, n) \ +static void name(int16_t *blocks) \ +{ \ + __asm__ volatile ( \ + "pxor %%mm7, %%mm7 \n\t" \ + "mov %1, %%"REG_a" \n\t" \ + "1: \n\t" \ + "movq %%mm7, (%0, %%"REG_a") \n\t" \ + "movq %%mm7, 8(%0, %%"REG_a") \n\t" \ + "movq %%mm7, 16(%0, %%"REG_a") \n\t" \ + "movq %%mm7, 24(%0, %%"REG_a") \n\t" \ + "add $32, %%"REG_a" \n\t" \ + "js 1b \n\t" \ + :: "r"(((uint8_t *) blocks) + 128 * n), \ + "i"(-128 * n) \ + : "%"REG_a); \ +} +CLEAR_BLOCKS(clear_blocks_mmx, 6) +CLEAR_BLOCKS(clear_block_mmx, 1) + +static void clear_block_sse(int16_t *block) +{ + __asm__ volatile ( + "xorps %%xmm0, %%xmm0 \n" + "movaps %%xmm0, (%0) \n" + "movaps %%xmm0, 16(%0) \n" + "movaps %%xmm0, 32(%0) \n" + "movaps %%xmm0, 48(%0) \n" + "movaps %%xmm0, 64(%0) \n" + "movaps %%xmm0, 80(%0) \n" + "movaps %%xmm0, 96(%0) \n" + "movaps %%xmm0, 112(%0) \n" + :: "r" (block) + : "memory"); +} + +static void clear_blocks_sse(int16_t *blocks) +{ + __asm__ volatile ( + "xorps %%xmm0, %%xmm0 \n" + "mov %1, %%"REG_a" \n" + "1: \n" + "movaps %%xmm0, (%0, %%"REG_a") \n" + "movaps %%xmm0, 16(%0, %%"REG_a") \n" + "movaps %%xmm0, 32(%0, %%"REG_a") \n" + "movaps %%xmm0, 48(%0, %%"REG_a") \n" + "movaps %%xmm0, 64(%0, %%"REG_a") \n" + "movaps %%xmm0, 80(%0, %%"REG_a") \n" + "movaps %%xmm0, 96(%0, %%"REG_a") \n" + "movaps %%xmm0, 112(%0, %%"REG_a") \n" + "add $128, %%"REG_a" \n" + "js 1b \n" + :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6) + : "%"REG_a); +} + +#endif /* HAVE_INLINE_ASM */ + +#if FF_API_XVMC +av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth, + AVCodecContext *avctx) +#else +av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth) +#endif /* FF_API_XVMC */ +{ +#if HAVE_INLINE_ASM + int cpu_flags = av_get_cpu_flags(); + + if (!high_bit_depth) { + if (INLINE_MMX(cpu_flags)) { + c->clear_block = clear_block_mmx; + c->clear_blocks = clear_blocks_mmx; + } + +#if FF_API_XVMC +FF_DISABLE_DEPRECATION_WARNINGS + /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ + if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1) + return; +FF_ENABLE_DEPRECATION_WARNINGS +#endif /* FF_API_XVMC */ + + if (INLINE_SSE(cpu_flags)) { + c->clear_block = clear_block_sse; + c->clear_blocks = clear_blocks_sse; + } + } +#endif /* HAVE_INLINE_ASM */ +} diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index 389e7634dd..a19b83d83c 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -19,12 +19,10 @@ #include "config.h" #include "libavutil/attributes.h" #include "libavutil/cpu.h" -#include "libavutil/internal.h" #include "libavutil/x86/cpu.h" #include "libavcodec/avcodec.h" #include "libavcodec/dsputil.h" #include "libavcodec/simple_idct.h" -#include "libavcodec/version.h" #include "dsputil_x86.h" #include "idct_xvid.h" @@ -54,8 +52,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, c->add_pixels_clamped = ff_add_pixels_clamped_mmx; if (!high_bit_depth) { - c->clear_block = ff_clear_block_mmx; - c->clear_blocks = ff_clear_blocks_mmx; c->draw_edges = ff_draw_edges_mmx; switch (avctx->idct_algo) { @@ -103,19 +99,6 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, { #if HAVE_SSE_INLINE c->vector_clipf = ff_vector_clipf_sse; - -#if FF_API_XVMC -FF_DISABLE_DEPRECATION_WARNINGS - /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ - if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1) - return; -FF_ENABLE_DEPRECATION_WARNINGS -#endif /* FF_API_XVMC */ - - if (!high_bit_depth) { - c->clear_block = ff_clear_block_sse; - c->clear_blocks = ff_clear_blocks_sse; - } #endif /* HAVE_SSE_INLINE */ } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index c17f8d00d5..fd74efeb3d 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -166,62 +166,6 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, } while (--i); } -#define CLEAR_BLOCKS(name, n) \ -void name(int16_t *blocks) \ -{ \ - __asm__ volatile ( \ - "pxor %%mm7, %%mm7 \n\t" \ - "mov %1, %%"REG_a" \n\t" \ - "1: \n\t" \ - "movq %%mm7, (%0, %%"REG_a") \n\t" \ - "movq %%mm7, 8(%0, %%"REG_a") \n\t" \ - "movq %%mm7, 16(%0, %%"REG_a") \n\t" \ - "movq %%mm7, 24(%0, %%"REG_a") \n\t" \ - "add $32, %%"REG_a" \n\t" \ - "js 1b \n\t" \ - :: "r"(((uint8_t *) blocks) + 128 * n), \ - "i"(-128 * n) \ - : "%"REG_a); \ -} -CLEAR_BLOCKS(ff_clear_blocks_mmx, 6) -CLEAR_BLOCKS(ff_clear_block_mmx, 1) - -void ff_clear_block_sse(int16_t *block) -{ - __asm__ volatile ( - "xorps %%xmm0, %%xmm0 \n" - "movaps %%xmm0, (%0) \n" - "movaps %%xmm0, 16(%0) \n" - "movaps %%xmm0, 32(%0) \n" - "movaps %%xmm0, 48(%0) \n" - "movaps %%xmm0, 64(%0) \n" - "movaps %%xmm0, 80(%0) \n" - "movaps %%xmm0, 96(%0) \n" - "movaps %%xmm0, 112(%0) \n" - :: "r" (block) - : "memory"); -} - -void ff_clear_blocks_sse(int16_t *blocks) -{ - __asm__ volatile ( - "xorps %%xmm0, %%xmm0 \n" - "mov %1, %%"REG_a" \n" - "1: \n" - "movaps %%xmm0, (%0, %%"REG_a") \n" - "movaps %%xmm0, 16(%0, %%"REG_a") \n" - "movaps %%xmm0, 32(%0, %%"REG_a") \n" - "movaps %%xmm0, 48(%0, %%"REG_a") \n" - "movaps %%xmm0, 64(%0, %%"REG_a") \n" - "movaps %%xmm0, 80(%0, %%"REG_a") \n" - "movaps %%xmm0, 96(%0, %%"REG_a") \n" - "movaps %%xmm0, 112(%0, %%"REG_a") \n" - "add $128, %%"REG_a" \n" - "js 1b \n" - :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6) - : "%"REG_a); -} - /* Draw the edges of width 'w' of an image of size width, height * this MMX version can only handle w == 8 || w == 16. */ void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index a4bc8c2730..e99b6b7630 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -38,11 +38,6 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size); -void ff_clear_block_mmx(int16_t *block); -void ff_clear_block_sse(int16_t *block); -void ff_clear_blocks_mmx(int16_t *blocks); -void ff_clear_blocks_sse(int16_t *blocks); - void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides); |