diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-01-22 14:54:06 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-01-22 15:33:23 +0100 |
commit | 26345acb0e723d28aa28e09126ea383b2f679f5b (patch) | |
tree | f8d7a9f62d7557aca5ad56dd244fa415fdab56f2 | |
parent | 9774251c45eadfe0ba7b0381344451fa8840cc0d (diff) | |
parent | 0881cbf314982cce8448bd12644ce2a6e0b8c576 (diff) | |
download | ffmpeg-26345acb0e723d28aa28e09126ea383b2f679f5b.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
proresdec: support mixed interlaced/non-interlaced content
vp3/5: move put_no_rnd_pixels_l2 from dsputil to VP3DSPContext.
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/dsputil.c | 1 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 2 | ||||
-rw-r--r-- | libavcodec/dsputil_template.c | 4 | ||||
-rw-r--r-- | libavcodec/proresdec_lgpl.c | 2 | ||||
-rw-r--r-- | libavcodec/vp3.c | 2 | ||||
-rw-r--r-- | libavcodec/vp3dsp.c | 19 | ||||
-rw-r--r-- | libavcodec/vp3dsp.h | 16 | ||||
-rw-r--r-- | libavcodec/vp56.c | 6 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 35 | ||||
-rw-r--r-- | libavcodec/x86/vp3dsp_init.c | 61 |
10 files changed, 102 insertions, 46 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index ffb508535f..c52c8f1703 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2946,7 +2946,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\ c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\ - c->put_no_rnd_pixels_l2 = FUNCC(put_no_rnd_pixels8_l2 , depth);\ \ c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\ c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\ diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 5bac9dfd2c..8c52e5118a 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -308,8 +308,6 @@ typedef struct DSPContext { */ op_pixels_func avg_no_rnd_pixels_tab[4][4]; - void (*put_no_rnd_pixels_l2)(uint8_t *block/*align 8*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); - /** * Thirdpel motion compensation with rounding (a+b+1)>>1. * this is an array[12] of motion compensation functions for the 9 thirdpe diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c index 26167d808f..5162304d05 100644 --- a/libavcodec/dsputil_template.c +++ b/libavcodec/dsputil_template.c @@ -582,10 +582,6 @@ PIXOP2(put, op_put) #define put_no_rnd_pixels8_c put_pixels8_c #define put_no_rnd_pixels16_c put_pixels16_c -static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ - FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h); -} - #define H264_CHROMA_MC(OPNAME, OP)\ static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\ pixel *dst = (pixel*)p_dst;\ diff --git a/libavcodec/proresdec_lgpl.c b/libavcodec/proresdec_lgpl.c index 4fa2817e86..a25ecca3c9 100644 --- a/libavcodec/proresdec_lgpl.c +++ b/libavcodec/proresdec_lgpl.c @@ -164,6 +164,8 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf, if (ctx->frame_type) { /* if interlaced */ ctx->picture.interlaced_frame = 1; ctx->picture.top_field_first = ctx->frame_type & 1; + } else { + ctx->picture.interlaced_frame = 0; } avctx->color_primaries = buf[14]; diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c index 7b470dcccf..11436e5772 100644 --- a/libavcodec/vp3.c +++ b/libavcodec/vp3.c @@ -1570,7 +1570,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) motion_source, stride, 8); }else{ int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1 - s->dsp.put_no_rnd_pixels_l2( + s->vp3dsp.put_no_rnd_pixels_l2( output_plane + first_pixel, motion_source - d, motion_source + stride + 1 + d, diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index 6502a158e6..6db80c8d00 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -274,8 +274,27 @@ static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, } } +static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1, + const uint8_t *src2, ptrdiff_t stride, int h) +{ + int i; + + for (i = 0; i < h; i++) { + uint32_t a, b; + + a = AV_RN32A(&src1[i * stride]); + b = AV_RN32A(&src2[i * stride]); + AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b)); + a = AV_RN32A(&src1[i * stride + 4]); + b = AV_RN32A(&src2[i * stride + 4]); + AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b)); + } +} + av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) { + c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2; + c->idct_put = vp3_idct_put_c; c->idct_add = vp3_idct_add_c; c->idct_dc_add = vp3_idct_dc_add_c; diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h index bc651997d2..35bd95c684 100644 --- a/libavcodec/vp3dsp.h +++ b/libavcodec/vp3dsp.h @@ -19,10 +19,26 @@ #ifndef AVCODEC_VP3DSP_H #define AVCODEC_VP3DSP_H +#include <stddef.h> #include <stdint.h> #include "dsputil.h" typedef struct VP3DSPContext { + /** + * Copy 8xH pixels from source to destination buffer using a bilinear + * filter with no rounding (i.e. *dst = (*a + *b) >> 1). + * + * @param dst destination buffer, aligned by 8 + * @param a first source buffer, no alignment + * @param b second source buffer, no alignment + * @param stride distance between two lines in source/dest buffers + * @param h height + */ + void (*put_no_rnd_pixels_l2)(uint8_t *dst, + const uint8_t *a, + const uint8_t *b, + ptrdiff_t stride, int h); + void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block); void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block); void (*idct_dc_add)(uint8_t *dest, int line_size, DCTELEM *block); diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c index 13bbb02481..7df5adade3 100644 --- a/libavcodec/vp56.c +++ b/libavcodec/vp56.c @@ -373,9 +373,9 @@ static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src, s->filter(s, dst, src_block, src_offset, src_offset+overlap_offset, stride, s->mv[b], mask, s->filter_selection, b<4); else - s->dsp.put_no_rnd_pixels_l2(dst, src_block+src_offset, - src_block+src_offset+overlap_offset, - stride, 8); + s->vp3dsp.put_no_rnd_pixels_l2(dst, src_block+src_offset, + src_block+src_offset+overlap_offset, + stride, 8); } else { s->dsp.put_pixels_tab[1][0](dst, src_block+src_offset, stride, 8); } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 1e0f8a168c..3bc3d61c89 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1839,39 +1839,6 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, avg_pixels8_mmxext(dst, src, stride, 8); } -/* only used in VP3/5/6 */ -static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h) -{ -// START_TIMER - MOVQ_BFE(mm6); - __asm__ volatile( - "1: \n\t" - "movq (%1), %%mm0 \n\t" - "movq (%2), %%mm1 \n\t" - "movq (%1,%4), %%mm2 \n\t" - "movq (%2,%4), %%mm3 \n\t" - PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) - "movq %%mm4, (%3) \n\t" - "movq %%mm5, (%3,%4) \n\t" - - "movq (%1,%4,2), %%mm0 \n\t" - "movq (%2,%4,2), %%mm1 \n\t" - "movq (%1,%5), %%mm2 \n\t" - "movq (%2,%5), %%mm3 \n\t" - "lea (%1,%4,4), %1 \n\t" - "lea (%2,%4,4), %2 \n\t" - PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) - "movq %%mm4, (%3,%4,2) \n\t" - "movq %%mm5, (%3,%5) \n\t" - "lea (%3,%4,4), %3 \n\t" - "subl $4, %0 \n\t" - "jnz 1b \n\t" - :"+r"(h), "+r"(a), "+r"(b), "+r"(dst) - :"r"((x86_reg)stride), "r"((x86_reg)3L*stride) - :"memory"); -// STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx") -} - #if CONFIG_DIRAC_DECODER #define DIRAC_PIXOP(OPNAME, EXT)\ void ff_ ## OPNAME ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ @@ -2115,8 +2082,6 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) c->add_bytes = add_bytes_mmx; - c->put_no_rnd_pixels_l2= put_vp_no_rnd_pixels8_l2_mmx; - if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { c->h263_v_loop_filter = h263_v_loop_filter_mmx; c->h263_h_loop_filter = h263_h_loop_filter_mmx; diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c index 245b60f6fa..e425f503ac 100644 --- a/libavcodec/x86/vp3dsp_init.c +++ b/libavcodec/x86/vp3dsp_init.c @@ -1,4 +1,6 @@ /* + * Copyright (c) 2009 David Conrad <lessen42@gmail.com> + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -21,6 +23,7 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/x86/cpu.h" +#include "libavutil/x86/asm.h" #include "libavcodec/avcodec.h" #include "libavcodec/vp3dsp.h" #include "config.h" @@ -39,10 +42,68 @@ void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride, void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride, int *bounding_values); +#if HAVE_INLINE_ASM + +#define MOVQ_BFE(regd) \ + __asm__ volatile ( \ + "pcmpeqd %%"#regd", %%"#regd" \n\t" \ + "paddb %%"#regd", %%"#regd" \n\t" ::) + +#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \ + "movq "#rega", "#regr" \n\t" \ + "movq "#regc", "#regp" \n\t" \ + "pand "#regb", "#regr" \n\t" \ + "pand "#regd", "#regp" \n\t" \ + "pxor "#rega", "#regb" \n\t" \ + "pxor "#regc", "#regd" \n\t" \ + "pand %%mm6, "#regb" \n\t" \ + "pand %%mm6, "#regd" \n\t" \ + "psrlq $1, "#regb" \n\t" \ + "psrlq $1, "#regd" \n\t" \ + "paddb "#regb", "#regr" \n\t" \ + "paddb "#regd", "#regp" \n\t" + +static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h) +{ +// START_TIMER + MOVQ_BFE(mm6); + __asm__ volatile( + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "movq (%1,%4), %%mm2 \n\t" + "movq (%2,%4), %%mm3 \n\t" + PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%3) \n\t" + "movq %%mm5, (%3,%4) \n\t" + + "movq (%1,%4,2), %%mm0 \n\t" + "movq (%2,%4,2), %%mm1 \n\t" + "movq (%1,%5), %%mm2 \n\t" + "movq (%2,%5), %%mm3 \n\t" + "lea (%1,%4,4), %1 \n\t" + "lea (%2,%4,4), %2 \n\t" + PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%3,%4,2) \n\t" + "movq %%mm5, (%3,%5) \n\t" + "lea (%3,%4,4), %3 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+r"(h), "+r"(a), "+r"(b), "+r"(dst) + :"r"((x86_reg)stride), "r"((x86_reg)3L*stride) + :"memory"); +// STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx") +} +#endif /* HAVE_INLINE_ASM */ + av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) { int cpuflags = av_get_cpu_flags(); +#if HAVE_INLINE_ASM + c->put_no_rnd_pixels_l2 = put_vp_no_rnd_pixels8_l2_mmx; +#endif /* HAVE_INLINE_ASM */ + #if ARCH_X86_32 if (EXTERNAL_MMX(cpuflags)) { c->idct_put = ff_vp3_idct_put_mmx; |