diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-07-07 15:54:17 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-07 16:17:27 +0200 |
commit | 3790801f9cedfae81cb8f055bbb0b14131459658 (patch) | |
tree | 4fb4e35103252ce0a7cecf1d01041db506078894 | |
parent | 020865f557ccf06a41ecc461fd13ce6678817d04 (diff) | |
parent | 3c650efb81aaa3b395ba4606ee68a47ee4efb57b (diff) | |
download | ffmpeg-3790801f9cedfae81cb8f055bbb0b14131459658.tar.gz |
Merge commit '3c650efb81aaa3b395ba4606ee68a47ee4efb57b'
* commit '3c650efb81aaa3b395ba4606ee68a47ee4efb57b':
dsputil: Move draw_edges() to mpegvideoencdsp
Conflicts:
libavcodec/mpegvideo_enc.c
libavcodec/x86/Makefile
libavcodec/x86/dsputil_init.c
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_x86.h
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/diracdec.c | 12 | ||||
-rw-r--r-- | libavcodec/dsputil.c | 30 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 6 | ||||
-rw-r--r-- | libavcodec/mpegvideo.h | 2 | ||||
-rw-r--r-- | libavcodec/mpegvideo_enc.c | 41 | ||||
-rw-r--r-- | libavcodec/mpegvideoencdsp.c | 31 | ||||
-rw-r--r-- | libavcodec/mpegvideoencdsp.h | 6 | ||||
-rw-r--r-- | libavcodec/snow.c | 19 | ||||
-rw-r--r-- | libavcodec/snow.h | 1 | ||||
-rw-r--r-- | libavcodec/snowenc.c | 8 | ||||
-rw-r--r-- | libavcodec/utils.c | 1 | ||||
-rw-r--r-- | libavcodec/x86/Makefile | 1 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_init.c | 16 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 150 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_x86.h | 3 | ||||
-rw-r--r-- | libavcodec/x86/mpegvideoencdsp_init.c | 119 |
16 files changed, 206 insertions, 240 deletions
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c index d7cca7e0da..d1c3758cb4 100644 --- a/libavcodec/diracdec.c +++ b/libavcodec/diracdec.c @@ -34,6 +34,8 @@ #include "golomb.h" #include "dirac_arith.h" #include "mpeg12data.h" +#include "libavcodec/mpegvideo.h" +#include "mpegvideoencdsp.h" #include "dirac_dwt.h" #include "dirac.h" #include "diracdsp.h" @@ -137,6 +139,7 @@ typedef struct Plane { typedef struct DiracContext { AVCodecContext *avctx; DSPContext dsp; + MpegvideoEncDSPContext mpvencdsp; DiracDSPContext diracdsp; GetBitContext gb; dirac_source_params source; @@ -424,6 +427,7 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx) ff_dsputil_init(&s->dsp, avctx); ff_diracdsp_init(&s->diracdsp); + ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); for (i = 0; i < MAX_FRAMES; i++) { s->all_frames[i].avframe = av_frame_alloc(); @@ -1556,7 +1560,7 @@ static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, in int i, edge = EDGE_WIDTH/2; ref->hpel[plane][0] = ref->avframe->data[plane]; - s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ + s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ /* no need for hpel if we only have fpel vectors */ if (!s->mv_precision) @@ -1573,9 +1577,9 @@ static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, in s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2], ref->hpel[plane][3], ref->hpel[plane][0], ref->avframe->linesize[plane], width, height); - s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); - s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); - s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); } ref->interpolated[plane] = 1; } diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 640a4bfa9c..5d7fbb1126 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -937,34 +937,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WRAPPER8_16_SQ(rd8x8_c, rd16_c) WRAPPER8_16_SQ(bit8x8_c, bit16_c) -/* draw the edges of width 'w' of an image of size width, height */ -// FIXME: Check that this is OK for MPEG-4 interlaced. -static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr = buf, *last_line; - int i; - - /* left and right */ - for (i = 0; i < height; i++) { - memset(ptr - w, ptr[0], w); - memset(ptr + width, ptr[width - 1], w); - ptr += wrap; - } - - /* top and bottom + corners */ - buf -= w; - last_line = buf + (height - 1) * wrap; - if (sides & EDGE_TOP) - for (i = 0; i < h; i++) - // top - memcpy(buf - (i + 1) * wrap, buf, width + w + w); - if (sides & EDGE_BOTTOM) - for (i = 0; i < h; i++) - // bottom - memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); -} - /* init static data */ av_cold void ff_dsputil_static_init(void) { @@ -1067,8 +1039,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) ff_dsputil_init_dwt(c); #endif - c->draw_edges = draw_edges_8_c; - switch (avctx->bits_per_raw_sample) { case 9: case 10: diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 8dafbbd9d7..0d7dd14734 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -99,12 +99,6 @@ typedef struct DSPContext { /* (I)DCT */ void (*fdct)(int16_t *block /* align 16 */); void (*fdct248)(int16_t *block /* align 16 */); - - void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides); -#define EDGE_WIDTH 16 -#define EDGE_TOP 1 -#define EDGE_BOTTOM 2 } DSPContext; void ff_dsputil_static_init(void); diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 795a3bd116..cf0c47b4dd 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -81,6 +81,8 @@ enum OutputFormat { #define INPLACE_OFFSET 16 +#define EDGE_WIDTH 16 + /* Start codes. */ #define SEQ_END_CODE 0x000001b7 #define SEQ_START_CODE 0x000001b3 diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 0d630df4ea..d39f88edde 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -1145,11 +1145,11 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg) } } if ((s->width & 15) || (s->height & (vpad-1))) { - s->dsp.draw_edges(dst, dst_stride, - w, h, - 16>>h_shift, - vpad>>v_shift, - EDGE_BOTTOM); + s->mpvencdsp.draw_edges(dst, dst_stride, + w, h, + 16>>h_shift, + vpad>>v_shift, + EDGE_BOTTOM); } } } @@ -1529,18 +1529,25 @@ static void frame_end(MpegEncContext *s) const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt); int hshift = desc->log2_chroma_w; int vshift = desc->log2_chroma_h; - s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0], - s->h_edge_pos, s->v_edge_pos, - EDGE_WIDTH, EDGE_WIDTH, - EDGE_TOP | EDGE_BOTTOM); - s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1], - s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, - EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, - EDGE_TOP | EDGE_BOTTOM); - s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2], - s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, - EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, - EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture.f->data[0], + s->current_picture.f->linesize[0], + s->h_edge_pos, s->v_edge_pos, + EDGE_WIDTH, EDGE_WIDTH, + EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture.f->data[1], + s->current_picture.f->linesize[1], + s->h_edge_pos >> hshift, + s->v_edge_pos >> vshift, + EDGE_WIDTH >> hshift, + EDGE_WIDTH >> vshift, + EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture.f->data[2], + s->current_picture.f->linesize[2], + s->h_edge_pos >> hshift, + s->v_edge_pos >> vshift, + EDGE_WIDTH >> hshift, + EDGE_WIDTH >> vshift, + EDGE_TOP | EDGE_BOTTOM); } emms_c(); diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c index bde4345750..10ad369a67 100644 --- a/libavcodec/mpegvideoencdsp.c +++ b/libavcodec/mpegvideoencdsp.c @@ -18,6 +18,7 @@ #include <assert.h> #include <stdint.h> +#include <string.h> #include "config.h" #include "libavutil/avassert.h" @@ -125,6 +126,34 @@ static int pix_norm1_c(uint8_t *pix, int line_size) return s; } +/* draw the edges of width 'w' of an image of size width, height */ +// FIXME: Check that this is OK for MPEG-4 interlaced. +static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr = buf, *last_line; + int i; + + /* left and right */ + for (i = 0; i < height; i++) { + memset(ptr - w, ptr[0], w); + memset(ptr + width, ptr[width - 1], w); + ptr += wrap; + } + + /* top and bottom + corners */ + buf -= w; + last_line = buf + (height - 1) * wrap; + if (sides & EDGE_TOP) + for (i = 0; i < h; i++) + // top + memcpy(buf - (i + 1) * wrap, buf, width + w + w); + if (sides & EDGE_BOTTOM) + for (i = 0; i < h; i++) + // bottom + memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); +} + av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, AVCodecContext *avctx) { @@ -139,6 +168,8 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; + c->draw_edges = draw_edges_8_c; + if (ARCH_ARM) ff_mpegvideoencdsp_init_arm(c, avctx); if (ARCH_PPC) diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h index 81e3fe67b0..e12f4c6a39 100644 --- a/libavcodec/mpegvideoencdsp.h +++ b/libavcodec/mpegvideoencdsp.h @@ -26,6 +26,9 @@ #define BASIS_SHIFT 16 #define RECON_SHIFT 6 +#define EDGE_TOP 1 +#define EDGE_BOTTOM 2 + typedef struct MpegvideoEncDSPContext { int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); @@ -36,6 +39,9 @@ typedef struct MpegvideoEncDSPContext { void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); + + void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides); } MpegvideoEncDSPContext; void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, diff --git a/libavcodec/snow.c b/libavcodec/snow.c index e1ed29793d..711d1a4f08 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -433,6 +433,7 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){ ff_videodsp_init(&s->vdsp, 8); ff_dwt_init(&s->dwt); ff_h264qpel_init(&s->h264qpel, 8); + ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); #define mcf(dx,dy)\ s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\ @@ -642,16 +643,16 @@ int ff_snow_frame_start(SnowContext *s){ int h= s->avctx->height; if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) { - s->dsp.draw_edges(s->current_picture->data[0], - s->current_picture->linesize[0], w , h , - EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture->data[0], + s->current_picture->linesize[0], w , h , + EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); if (s->current_picture->data[2]) { - s->dsp.draw_edges(s->current_picture->data[1], - s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift, - EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); - s->dsp.draw_edges(s->current_picture->data[2], - s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift, - EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture->data[1], + s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift, + EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture->data[2], + s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift, + EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); } } diff --git a/libavcodec/snow.h b/libavcodec/snow.h index a9c8518d75..2cda5b323d 100644 --- a/libavcodec/snow.h +++ b/libavcodec/snow.h @@ -115,6 +115,7 @@ typedef struct SnowContext{ QpelDSPContext qdsp; VideoDSPContext vdsp; H264QpelContext h264qpel; + MpegvideoEncDSPContext mpvencdsp; SnowDWTContext dwt; AVFrame *new_picture; AVFrame *input_picture; ///< new_picture with the internal linesizes diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c index a39cd84403..cb8382154d 100644 --- a/libavcodec/snowenc.c +++ b/libavcodec/snowenc.c @@ -1568,10 +1568,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]], &pict->data[i][y * pict->linesize[i]], width>>hshift); - s->dsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], - width >> hshift, height >> vshift, - EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, - EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], + width >> hshift, height >> vshift, + EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, + EDGE_TOP | EDGE_BOTTOM); } emms_c(); diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 65f8c363b1..9fa8e16042 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -43,6 +43,7 @@ #include "avcodec.h" #include "dsputil.h" #include "libavutil/opt.h" +#include "mpegvideo.h" #include "thread.h" #include "frame_thread_encoder.h" #include "internal.h" diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index ac336c7d86..adcf62746a 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o -MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o MMX-OBJS-$(CONFIG_ENCODERS) += x86/fdct.o MMX-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_mmx.o \ diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index c1b110b68d..85c637b620 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -27,25 +27,9 @@ #include "libavcodec/dsputil.h" #include "dsputil_x86.h" -static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, - int cpu_flags, unsigned high_bit_depth) -{ -#if HAVE_MMX_INLINE - if (!high_bit_depth) { - c->draw_edges = ff_draw_edges_mmx; - } - -#endif /* HAVE_MMX_INLINE */ -} - av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { - int cpu_flags = av_get_cpu_flags(); - - if (X86_MMX(cpu_flags)) - dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); - if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c deleted file mode 100644 index dca971e4e5..0000000000 --- a/libavcodec/x86/dsputil_mmx.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * MMX optimized DSP utils - * Copyright (c) 2000, 2001 Fabrice Bellard - * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * MMX optimization by Nick Kurshev <nickols_k@mail.ru> - */ - -#include "config.h" -#include "libavutil/avassert.h" -#include "libavutil/cpu.h" -#include "libavutil/x86/asm.h" -#include "libavcodec/pixels.h" -#include "libavcodec/videodsp.h" -#include "dsputil_x86.h" -#include "inline_asm.h" - -#if HAVE_INLINE_ASM - -/* Draw the edges of width 'w' of an image of size width, height - * this MMX version can only handle w == 8 || w == 16. */ -void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr, *last_line; - int i; - - last_line = buf + (height - 1) * wrap; - /* left and right */ - ptr = buf; - if (w == 8) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } else if (w == 16) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq %%mm0, -16(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "movq %%mm1, 8(%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r"(ptr) - : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) - ); - } else { - av_assert1(w == 4); - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "movd %%mm0, -4(%0) \n\t" - "movd -4(%0, %2), %%mm1 \n\t" - "punpcklbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movd %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } - - /* top and bottom (and hopefully also the corners) */ - if (sides & EDGE_TOP) { - for (i = 0; i < h; i += 4) { - ptr = buf - (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) buf - (x86_reg) ptr - w), - "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), - "r" (ptr + width + 2 * w)); - } - } - - if (sides & EDGE_BOTTOM) { - for (i = 0; i < h; i += 4) { - ptr = last_line + (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) last_line - (x86_reg) ptr - w), - "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), - "r" (ptr + width + 2 * w)); - } - } -} - -#endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index c94d00438b..3e6f364a44 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -31,9 +31,6 @@ void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx); -void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides); - void ff_mmx_idct(int16_t *block); void ff_mmxext_idct(int16_t *block); diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c index 16841893a4..d91b902187 100644 --- a/libavcodec/x86/mpegvideoencdsp_init.c +++ b/libavcodec/x86/mpegvideoencdsp_init.c @@ -17,6 +17,7 @@ */ #include "libavutil/attributes.h" +#include "libavutil/avassert.h" #include "libavutil/cpu.h" #include "libavutil/x86/cpu.h" #include "libavcodec/avcodec.h" @@ -96,6 +97,120 @@ int ff_pix_norm1_sse2(uint8_t *pix, int line_size); #undef PHADDD #endif /* HAVE_SSSE3_INLINE */ +/* Draw the edges of width 'w' of an image of size width, height + * this MMX version can only handle w == 8 || w == 16. */ +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr, *last_line; + int i; + + last_line = buf + (height - 1) * wrap; + /* left and right */ + ptr = buf; + if (w == 8) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } else if (w == 16) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq %%mm0, -16(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "movq %%mm1, 8(%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r"(ptr) + : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) + ); + } else { + av_assert1(w == 4); + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "movd %%mm0, -4(%0) \n\t" + "movd -4(%0, %2), %%mm1 \n\t" + "punpcklbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movd %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } + + /* top and bottom (and hopefully also the corners) */ + if (sides & EDGE_TOP) { + for (i = 0; i < h; i += 4) { + ptr = buf - (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) buf - (x86_reg) ptr - w), + "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), + "r" (ptr + width + 2 * w)); + } + } + + if (sides & EDGE_BOTTOM) { + for (i = 0; i < h; i += 4) { + ptr = last_line + (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) last_line - (x86_reg) ptr - w), + "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), + "r" (ptr + width + 2 * w)); + } + } +} + #endif /* HAVE_INLINE_ASM */ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, @@ -124,6 +239,10 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, c->try_8x8basis = try_8x8basis_mmx; } c->add_8x8basis = add_8x8basis_mmx; + + if (avctx->bits_per_raw_sample <= 8) { + c->draw_edges = draw_edges_mmx; + } } if (INLINE_AMD3DNOW(cpu_flags)) { |