diff options
author | Kostya Shishkov <kostya.shishkov@gmail.com> | 2011-08-09 11:00:09 +0200 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-08-11 16:07:15 -0700 |
commit | d241f51e0f7c08060d2fa72117e2a1f273ab0c72 (patch) | |
tree | 59716c878cbf5be19e11f5c7833818f8e1a83bc4 /libavcodec | |
parent | 21d70372341d0e7a1c5ac34c4522850f40c503d5 (diff) | |
download | ffmpeg-d241f51e0f7c08060d2fa72117e2a1f273ab0c72.tar.gz |
Move RV3/4-specific DSP functions into their own context
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/dsputil.c | 28 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 18 | ||||
-rw-r--r-- | libavcodec/rv30dsp.c | 80 | ||||
-rw-r--r-- | libavcodec/rv34.c | 39 | ||||
-rw-r--r-- | libavcodec/rv34.h | 2 | ||||
-rw-r--r-- | libavcodec/rv34dsp.h | 44 | ||||
-rw-r--r-- | libavcodec/rv40dsp.c | 138 | ||||
-rw-r--r-- | libavcodec/x86/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 20 | ||||
-rw-r--r-- | libavcodec/x86/rv40dsp.c | 60 |
10 files changed, 253 insertions, 178 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index d31860166d..039cf0b213 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -1280,16 +1280,16 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int } #if CONFIG_RV40_DECODER -static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ +void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ put_pixels16_xy2_8_c(dst, src, stride, 16); } -static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ +void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ avg_pixels16_xy2_8_c(dst, src, stride, 16); } -static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ +void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ put_pixels8_xy2_8_c(dst, src, stride, 8); } -static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ +void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ avg_pixels8_xy2_8_c(dst, src, stride, 8); } #endif /* CONFIG_RV40_DECODER */ @@ -2903,16 +2903,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER ff_intrax8dsp_init(c,avctx); #endif -#if CONFIG_RV30_DECODER - ff_rv30dsp_init(c,avctx); -#endif -#if CONFIG_RV40_DECODER - ff_rv40dsp_init(c,avctx); - c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c; - c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c; - c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c; - c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c; -#endif c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c; c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; @@ -3124,16 +3114,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; } - c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; - c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; - c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; - c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; - - c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; - c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; - c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; - c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; - switch(c->idct_permutation_type){ case FF_NO_IDCT_PERM: for(i=0; i<64; i++) diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 536357a529..4d783cf266 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -114,6 +114,12 @@ void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); /* EA functions */ void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); +/* RV40 functions */ +void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride); +void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride); +void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride); +void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride); + /* 1/2^n downscaling functions from imgconvert.c */ void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); @@ -542,16 +548,6 @@ typedef struct DSPContext { void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); - /* rv30 functions */ - qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; - qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; - - /* rv40 functions */ - qpel_mc_func put_rv40_qpel_pixels_tab[4][16]; - qpel_mc_func avg_rv40_qpel_pixels_tab[4][16]; - h264_chroma_mc_func put_rv40_chroma_pixels_tab[3]; - h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3]; - op_fill_func fill_block_tab[2]; } DSPContext; @@ -626,8 +622,6 @@ void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_dwt(DSPContext *c); -void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); -void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_mlp_init(DSPContext* c, AVCodecContext *avctx); void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/rv30dsp.c b/libavcodec/rv30dsp.c index 4ead774f9e..6ba1a6bfc1 100644 --- a/libavcodec/rv30dsp.c +++ b/libavcodec/rv30dsp.c @@ -26,6 +26,7 @@ #include "avcodec.h" #include "dsputil.h" +#include "rv34dsp.h" #define RV30_LOWPASS(OPNAME, OP) \ static av_unused void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\ @@ -251,41 +252,46 @@ RV30_MC(put_, 16) RV30_MC(avg_, 8) RV30_MC(avg_, 16) -av_cold void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx) { - c->put_rv30_tpel_pixels_tab[0][ 0] = c->put_h264_qpel_pixels_tab[0][0]; - c->put_rv30_tpel_pixels_tab[0][ 1] = put_rv30_tpel16_mc10_c; - c->put_rv30_tpel_pixels_tab[0][ 2] = put_rv30_tpel16_mc20_c; - c->put_rv30_tpel_pixels_tab[0][ 4] = put_rv30_tpel16_mc01_c; - c->put_rv30_tpel_pixels_tab[0][ 5] = put_rv30_tpel16_mc11_c; - c->put_rv30_tpel_pixels_tab[0][ 6] = put_rv30_tpel16_mc21_c; - c->put_rv30_tpel_pixels_tab[0][ 8] = put_rv30_tpel16_mc02_c; - c->put_rv30_tpel_pixels_tab[0][ 9] = put_rv30_tpel16_mc12_c; - c->put_rv30_tpel_pixels_tab[0][10] = put_rv30_tpel16_mc22_c; - c->avg_rv30_tpel_pixels_tab[0][ 0] = c->avg_h264_qpel_pixels_tab[0][0]; - c->avg_rv30_tpel_pixels_tab[0][ 1] = avg_rv30_tpel16_mc10_c; - c->avg_rv30_tpel_pixels_tab[0][ 2] = avg_rv30_tpel16_mc20_c; - c->avg_rv30_tpel_pixels_tab[0][ 4] = avg_rv30_tpel16_mc01_c; - c->avg_rv30_tpel_pixels_tab[0][ 5] = avg_rv30_tpel16_mc11_c; - c->avg_rv30_tpel_pixels_tab[0][ 6] = avg_rv30_tpel16_mc21_c; - c->avg_rv30_tpel_pixels_tab[0][ 8] = avg_rv30_tpel16_mc02_c; - c->avg_rv30_tpel_pixels_tab[0][ 9] = avg_rv30_tpel16_mc12_c; - c->avg_rv30_tpel_pixels_tab[0][10] = avg_rv30_tpel16_mc22_c; - c->put_rv30_tpel_pixels_tab[1][ 0] = c->put_h264_qpel_pixels_tab[1][0]; - c->put_rv30_tpel_pixels_tab[1][ 1] = put_rv30_tpel8_mc10_c; - c->put_rv30_tpel_pixels_tab[1][ 2] = put_rv30_tpel8_mc20_c; - c->put_rv30_tpel_pixels_tab[1][ 4] = put_rv30_tpel8_mc01_c; - c->put_rv30_tpel_pixels_tab[1][ 5] = put_rv30_tpel8_mc11_c; - c->put_rv30_tpel_pixels_tab[1][ 6] = put_rv30_tpel8_mc21_c; - c->put_rv30_tpel_pixels_tab[1][ 8] = put_rv30_tpel8_mc02_c; - c->put_rv30_tpel_pixels_tab[1][ 9] = put_rv30_tpel8_mc12_c; - c->put_rv30_tpel_pixels_tab[1][10] = put_rv30_tpel8_mc22_c; - c->avg_rv30_tpel_pixels_tab[1][ 0] = c->avg_h264_qpel_pixels_tab[1][0]; - c->avg_rv30_tpel_pixels_tab[1][ 1] = avg_rv30_tpel8_mc10_c; - c->avg_rv30_tpel_pixels_tab[1][ 2] = avg_rv30_tpel8_mc20_c; - c->avg_rv30_tpel_pixels_tab[1][ 4] = avg_rv30_tpel8_mc01_c; - c->avg_rv30_tpel_pixels_tab[1][ 5] = avg_rv30_tpel8_mc11_c; - c->avg_rv30_tpel_pixels_tab[1][ 6] = avg_rv30_tpel8_mc21_c; - c->avg_rv30_tpel_pixels_tab[1][ 8] = avg_rv30_tpel8_mc02_c; - c->avg_rv30_tpel_pixels_tab[1][ 9] = avg_rv30_tpel8_mc12_c; - c->avg_rv30_tpel_pixels_tab[1][10] = avg_rv30_tpel8_mc22_c; +av_cold void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp) { + c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0]; + c->put_pixels_tab[0][ 1] = put_rv30_tpel16_mc10_c; + c->put_pixels_tab[0][ 2] = put_rv30_tpel16_mc20_c; + c->put_pixels_tab[0][ 4] = put_rv30_tpel16_mc01_c; + c->put_pixels_tab[0][ 5] = put_rv30_tpel16_mc11_c; + c->put_pixels_tab[0][ 6] = put_rv30_tpel16_mc21_c; + c->put_pixels_tab[0][ 8] = put_rv30_tpel16_mc02_c; + c->put_pixels_tab[0][ 9] = put_rv30_tpel16_mc12_c; + c->put_pixels_tab[0][10] = put_rv30_tpel16_mc22_c; + c->avg_pixels_tab[0][ 0] = dsp->avg_h264_qpel_pixels_tab[0][0]; + c->avg_pixels_tab[0][ 1] = avg_rv30_tpel16_mc10_c; + c->avg_pixels_tab[0][ 2] = avg_rv30_tpel16_mc20_c; + c->avg_pixels_tab[0][ 4] = avg_rv30_tpel16_mc01_c; + c->avg_pixels_tab[0][ 5] = avg_rv30_tpel16_mc11_c; + c->avg_pixels_tab[0][ 6] = avg_rv30_tpel16_mc21_c; + c->avg_pixels_tab[0][ 8] = avg_rv30_tpel16_mc02_c; + c->avg_pixels_tab[0][ 9] = avg_rv30_tpel16_mc12_c; + c->avg_pixels_tab[0][10] = avg_rv30_tpel16_mc22_c; + c->put_pixels_tab[1][ 0] = dsp->put_h264_qpel_pixels_tab[1][0]; + c->put_pixels_tab[1][ 1] = put_rv30_tpel8_mc10_c; + c->put_pixels_tab[1][ 2] = put_rv30_tpel8_mc20_c; + c->put_pixels_tab[1][ 4] = put_rv30_tpel8_mc01_c; + c->put_pixels_tab[1][ 5] = put_rv30_tpel8_mc11_c; + c->put_pixels_tab[1][ 6] = put_rv30_tpel8_mc21_c; + c->put_pixels_tab[1][ 8] = put_rv30_tpel8_mc02_c; + c->put_pixels_tab[1][ 9] = put_rv30_tpel8_mc12_c; + c->put_pixels_tab[1][10] = put_rv30_tpel8_mc22_c; + c->avg_pixels_tab[1][ 0] = dsp->avg_h264_qpel_pixels_tab[1][0]; + c->avg_pixels_tab[1][ 1] = avg_rv30_tpel8_mc10_c; + c->avg_pixels_tab[1][ 2] = avg_rv30_tpel8_mc20_c; + c->avg_pixels_tab[1][ 4] = avg_rv30_tpel8_mc01_c; + c->avg_pixels_tab[1][ 5] = avg_rv30_tpel8_mc11_c; + c->avg_pixels_tab[1][ 6] = avg_rv30_tpel8_mc21_c; + c->avg_pixels_tab[1][ 8] = avg_rv30_tpel8_mc02_c; + c->avg_pixels_tab[1][ 9] = avg_rv30_tpel8_mc12_c; + c->avg_pixels_tab[1][10] = avg_rv30_tpel8_mc22_c; + + c->put_chroma_pixels_tab[0] = dsp->put_h264_chroma_pixels_tab[0]; + c->put_chroma_pixels_tab[1] = dsp->put_h264_chroma_pixels_tab[1]; + c->avg_chroma_pixels_tab[0] = dsp->avg_h264_chroma_pixels_tab[0]; + c->avg_chroma_pixels_tab[1] = dsp->avg_h264_chroma_pixels_tab[1]; } diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index f9773cdc45..f8192a8f8f 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -809,24 +809,18 @@ static void rv34_mc_1mv(RV34DecContext *r, const int block_type, const int width, const int height, int dir) { rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30, - r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab - : r->s.dsp.put_rv40_qpel_pixels_tab, - r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab - : r->s.dsp.put_rv40_chroma_pixels_tab); + r->rdsp.put_pixels_tab, + r->rdsp.put_chroma_pixels_tab); } static void rv34_mc_2mv(RV34DecContext *r, const int block_type) { rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30, - r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab - : r->s.dsp.put_rv40_qpel_pixels_tab, - r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab - : r->s.dsp.put_rv40_chroma_pixels_tab); + r->rdsp.put_pixels_tab, + r->rdsp.put_chroma_pixels_tab); rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, - r->rv30 ? r->s.dsp.avg_rv30_tpel_pixels_tab - : r->s.dsp.avg_rv40_qpel_pixels_tab, - r->rv30 ? r->s.dsp.avg_h264_chroma_pixels_tab - : r->s.dsp.avg_rv40_chroma_pixels_tab); + r->rdsp.avg_pixels_tab, + r->rdsp.avg_chroma_pixels_tab); } static void rv34_mc_2mv_skip(RV34DecContext *r) @@ -835,15 +829,11 @@ static void rv34_mc_2mv_skip(RV34DecContext *r) for(j = 0; j < 2; j++) for(i = 0; i < 2; i++){ rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30, - r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab - : r->s.dsp.put_rv40_qpel_pixels_tab, - r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab - : r->s.dsp.put_rv40_chroma_pixels_tab); + r->rdsp.put_pixels_tab, + r->rdsp.put_chroma_pixels_tab); rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30, - r->rv30 ? r->s.dsp.avg_rv30_tpel_pixels_tab - : r->s.dsp.avg_rv40_qpel_pixels_tab, - r->rv30 ? r->s.dsp.avg_h264_chroma_pixels_tab - : r->s.dsp.avg_rv40_chroma_pixels_tab); + r->rdsp.avg_pixels_tab, + r->rdsp.avg_chroma_pixels_tab); } } @@ -1363,6 +1353,15 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx) ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8); +#if CONFIG_RV30_DECODER + if (avctx->codec_id == CODEC_ID_RV30) + ff_rv30dsp_init(&r->rdsp, &r->s.dsp); +#endif +#if CONFIG_RV40_DECODER + if (avctx->codec_id == CODEC_ID_RV40) + ff_rv40dsp_init(&r->rdsp, &r->s.dsp); +#endif + r->intra_types_stride = 4*s->mb_stride + 4; r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist)); r->intra_types = r->intra_types_hist + r->intra_types_stride * 4; diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h index c9f4ff7a13..811afb4a80 100644 --- a/libavcodec/rv34.h +++ b/libavcodec/rv34.h @@ -32,6 +32,7 @@ #include "mpegvideo.h" #include "h264pred.h" +#include "rv34dsp.h" #define MB_TYPE_SEPARATE_DC 0x01000000 #define IS_SEPARATE_DC(a) ((a) & MB_TYPE_SEPARATE_DC) @@ -83,6 +84,7 @@ typedef struct SliceInfo{ /** decoder context */ typedef struct RV34DecContext{ MpegEncContext s; + RV34DSPContext rdsp; int8_t *intra_types_hist;///< old block types, used for prediction int8_t *intra_types; ///< block types int intra_types_stride;///< block types array stride diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h new file mode 100644 index 0000000000..771a6c0f08 --- /dev/null +++ b/libavcodec/rv34dsp.h @@ -0,0 +1,44 @@ +/* + * RV30/40 decoder motion compensation functions + * Copyright (c) 2008 Konstantin Shishkov + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * RV30/40 decoder motion compensation functions + */ + +#ifndef AVCODEC_RV34DSP_H +#define AVCODEC_RV34DSP_H + +#include "dsputil.h" + +typedef struct RV34DSPContext { + qpel_mc_func put_pixels_tab[4][16]; + qpel_mc_func avg_pixels_tab[4][16]; + h264_chroma_mc_func put_chroma_pixels_tab[3]; + h264_chroma_mc_func avg_chroma_pixels_tab[3]; +} RV34DSPContext; + +void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp); +void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp); + +void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp); + +#endif /* AVCODEC_RV34DSP_H */ diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c index 77f2002684..132f063dfa 100644 --- a/libavcodec/rv40dsp.c +++ b/libavcodec/rv40dsp.c @@ -26,6 +26,7 @@ #include "avcodec.h" #include "dsputil.h" +#include "rv34dsp.h" #define RV40_LOWPASS(OPNAME, OP) \ static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\ @@ -284,70 +285,77 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a RV40_CHROMA_MC(put_, op_put) RV40_CHROMA_MC(avg_, op_avg) -void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx) { - c->put_rv40_qpel_pixels_tab[0][ 0] = c->put_h264_qpel_pixels_tab[0][0]; - c->put_rv40_qpel_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c; - c->put_rv40_qpel_pixels_tab[0][ 2] = put_rv40_qpel16_mc20_c; - c->put_rv40_qpel_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c; - c->put_rv40_qpel_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c; - c->put_rv40_qpel_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c; - c->put_rv40_qpel_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c; - c->put_rv40_qpel_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c; - c->put_rv40_qpel_pixels_tab[0][ 8] = put_rv40_qpel16_mc02_c; - c->put_rv40_qpel_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c; - c->put_rv40_qpel_pixels_tab[0][10] = put_rv40_qpel16_mc22_c; - c->put_rv40_qpel_pixels_tab[0][11] = put_rv40_qpel16_mc32_c; - c->put_rv40_qpel_pixels_tab[0][12] = put_rv40_qpel16_mc03_c; - c->put_rv40_qpel_pixels_tab[0][13] = put_rv40_qpel16_mc13_c; - c->put_rv40_qpel_pixels_tab[0][14] = put_rv40_qpel16_mc23_c; - c->avg_rv40_qpel_pixels_tab[0][ 0] = c->avg_h264_qpel_pixels_tab[0][0]; - c->avg_rv40_qpel_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c; - c->avg_rv40_qpel_pixels_tab[0][ 2] = avg_rv40_qpel16_mc20_c; - c->avg_rv40_qpel_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c; - c->avg_rv40_qpel_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c; - c->avg_rv40_qpel_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c; - c->avg_rv40_qpel_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c; - c->avg_rv40_qpel_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c; - c->avg_rv40_qpel_pixels_tab[0][ 8] = avg_rv40_qpel16_mc02_c; - c->avg_rv40_qpel_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c; - c->avg_rv40_qpel_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c; - c->avg_rv40_qpel_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c; - c->avg_rv40_qpel_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c; - c->avg_rv40_qpel_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c; - c->avg_rv40_qpel_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c; - c->put_rv40_qpel_pixels_tab[1][ 0] = c->put_h264_qpel_pixels_tab[1][0]; - c->put_rv40_qpel_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c; - c->put_rv40_qpel_pixels_tab[1][ 2] = put_rv40_qpel8_mc20_c; - c->put_rv40_qpel_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c; - c->put_rv40_qpel_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c; - c->put_rv40_qpel_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c; - c->put_rv40_qpel_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c; - c->put_rv40_qpel_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c; - c->put_rv40_qpel_pixels_tab[1][ 8] = put_rv40_qpel8_mc02_c; - c->put_rv40_qpel_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c; - c->put_rv40_qpel_pixels_tab[1][10] = put_rv40_qpel8_mc22_c; - c->put_rv40_qpel_pixels_tab[1][11] = put_rv40_qpel8_mc32_c; - c->put_rv40_qpel_pixels_tab[1][12] = put_rv40_qpel8_mc03_c; - c->put_rv40_qpel_pixels_tab[1][13] = put_rv40_qpel8_mc13_c; - c->put_rv40_qpel_pixels_tab[1][14] = put_rv40_qpel8_mc23_c; - c->avg_rv40_qpel_pixels_tab[1][ 0] = c->avg_h264_qpel_pixels_tab[1][0]; - c->avg_rv40_qpel_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c; - c->avg_rv40_qpel_pixels_tab[1][ 2] = avg_rv40_qpel8_mc20_c; - c->avg_rv40_qpel_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c; - c->avg_rv40_qpel_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c; - c->avg_rv40_qpel_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c; - c->avg_rv40_qpel_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c; - c->avg_rv40_qpel_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c; - c->avg_rv40_qpel_pixels_tab[1][ 8] = avg_rv40_qpel8_mc02_c; - c->avg_rv40_qpel_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c; - c->avg_rv40_qpel_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c; - c->avg_rv40_qpel_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c; - c->avg_rv40_qpel_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c; - c->avg_rv40_qpel_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c; - c->avg_rv40_qpel_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c; +av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { + c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0]; + c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c; + c->put_pixels_tab[0][ 2] = put_rv40_qpel16_mc20_c; + c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c; + c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c; + c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c; + c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c; + c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c; + c->put_pixels_tab[0][ 8] = put_rv40_qpel16_mc02_c; + c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c; + c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c; + c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c; + c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c; + c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c; + c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c; + c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_c; + c->avg_pixels_tab[0][ 0] = dsp->avg_h264_qpel_pixels_tab[0][0]; + c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c; + c->avg_pixels_tab[0][ 2] = avg_rv40_qpel16_mc20_c; + c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c; + c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c; + c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c; + c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c; + c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c; + c->avg_pixels_tab[0][ 8] = avg_rv40_qpel16_mc02_c; + c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c; + c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c; + c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c; + c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c; + c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c; + c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c; + c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_c; + c->put_pixels_tab[1][ 0] = dsp->put_h264_qpel_pixels_tab[1][0]; + c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c; + c->put_pixels_tab[1][ 2] = put_rv40_qpel8_mc20_c; + c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c; + c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c; + c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c; + c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c; + c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c; + c->put_pixels_tab[1][ 8] = put_rv40_qpel8_mc02_c; + c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c; + c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c; + c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c; + c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c; + c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c; + c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c; + c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_c; + c->avg_pixels_tab[1][ 0] = dsp->avg_h264_qpel_pixels_tab[1][0]; + c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c; + c->avg_pixels_tab[1][ 2] = avg_rv40_qpel8_mc20_c; + c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c; + c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c; + c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c; + c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c; + c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c; + c->avg_pixels_tab[1][ 8] = avg_rv40_qpel8_mc02_c; + c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c; + c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c; + c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c; + c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c; + c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c; + c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c; + c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_c; - c->put_rv40_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c; - c->put_rv40_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c; - c->avg_rv40_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c; - c->avg_rv40_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c; + c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c; + c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c; + c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c; + c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c; + + if (HAVE_MMX) + ff_rv40dsp_init_x86(c, dsp); } diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index d3cf0da72b..a94f97a270 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -21,6 +21,8 @@ YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \ x86/h264_intrapred_10bit.o MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o +MMX-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o \ + YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o MMX-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_mmx.o diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 9909fdab78..0cd9601853 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1895,29 +1895,17 @@ PREFETCH(prefetch_3dnow, prefetch) void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_avg_rv40_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); @@ -2573,9 +2561,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; } - c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; - c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; - c->vector_clip_int32 = ff_vector_clip_int32_mmx; #endif @@ -2675,9 +2660,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); #if HAVE_YASM - c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; - c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2; - if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; @@ -2760,8 +2742,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; } - c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow; - c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow; #endif } diff --git a/libavcodec/x86/rv40dsp.c b/libavcodec/x86/rv40dsp.c new file mode 100644 index 0000000000..9f90ad8bb6 --- /dev/null +++ b/libavcodec/x86/rv40dsp.c @@ -0,0 +1,60 @@ +/* + * RV40 decoder motion compensation functions x86-optimised + * Copyright (c) 2008 Konstantin Shishkov + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * RV40 decoder motion compensation functions x86-optimised + */ + +#include "libavcodec/rv34dsp.h" + +void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); + +void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); + +void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) +{ + av_unused int mm_flags = av_get_cpu_flags(); + +#if HAVE_YASM + if (mm_flags & AV_CPU_FLAG_MMX) { + c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx; + c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx; + } + if (mm_flags & AV_CPU_FLAG_MMX2) { + c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; + c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; + } else if (mm_flags & AV_CPU_FLAG_3DNOW) { + c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow; + c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; + } +#endif +} |