diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-07-18 22:27:46 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-07-18 22:34:48 +0200 |
commit | 204c4e953d895e15ab0908d715fd46181bf32add (patch) | |
tree | aebdffe23aa1133eeb1db55b810135ecdfe1c188 /libavcodec | |
parent | c66978e29ad9cae2c71da83857c6a3cec11bb9cb (diff) | |
parent | ffdd93a25e64db82c053577f415ea82c54fd5235 (diff) | |
download | ffmpeg-204c4e953d895e15ab0908d715fd46181bf32add.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
ppc: fix build with altivec disabled
vp3: move idct and loop filter pointers to new vp3dsp context
build: add CONFIG_VP3DSP, reduce repetition in OBJS lists
tscc2: do not add/subtract 128 bias during DCT
tscc2: fix typo in DCT
configure: clarify external library section of help output
configure: mark libfdk-aac as nonfree
configure: cosmetics: drop some unnecessary backslashes
os_support: K&R formatting cosmetics
Conflicts:
configure
libavcodec/vp3.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/Makefile | 7 | ||||
-rw-r--r-- | libavcodec/arm/Makefile | 5 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_neon.c | 21 | ||||
-rw-r--r-- | libavcodec/arm/vp3dsp_init_arm.c | 45 | ||||
-rw-r--r-- | libavcodec/arm/vp3dsp_neon.S | 26 | ||||
-rw-r--r-- | libavcodec/dsputil.c | 12 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 13 | ||||
-rw-r--r-- | libavcodec/ppc/Makefile | 5 | ||||
-rw-r--r-- | libavcodec/ppc/dsputil_altivec.h | 4 | ||||
-rw-r--r-- | libavcodec/ppc/dsputil_ppc.c | 6 | ||||
-rw-r--r-- | libavcodec/ppc/vp3dsp_altivec.c | 42 | ||||
-rw-r--r-- | libavcodec/tscc2.c | 6 | ||||
-rw-r--r-- | libavcodec/vp3.c | 22 | ||||
-rw-r--r-- | libavcodec/vp3dsp.c | 37 | ||||
-rw-r--r-- | libavcodec/vp3dsp.h | 40 | ||||
-rw-r--r-- | libavcodec/vp56.c | 10 | ||||
-rw-r--r-- | libavcodec/vp56.h | 2 | ||||
-rw-r--r-- | libavcodec/x86/Makefile | 7 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 35 | ||||
-rw-r--r-- | libavcodec/x86/vp3dsp.asm | 4 | ||||
-rw-r--r-- | libavcodec/x86/vp3dsp_init.c | 65 |
21 files changed, 232 insertions, 182 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index bbe188a7ef..45dee193f4 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -59,6 +59,7 @@ OBJS-$(CONFIG_SINEWIN) += sinewin.o OBJS-$(CONFIG_VAAPI) += vaapi.o OBJS-$(CONFIG_VDA) += vda.o OBJS-$(CONFIG_VDPAU) += vdpau.o +OBJS-$(CONFIG_VP3DSP) += vp3dsp.o # decoders/encoders/hardware accelerators OBJS-$(CONFIG_A64MULTI_ENCODER) += a64multienc.o elbg.o @@ -484,11 +485,11 @@ OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbis.o \ vorbis_data.o xiph.o OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ vorbis_data.o -OBJS-$(CONFIG_VP3_DECODER) += vp3.o vp3dsp.o +OBJS-$(CONFIG_VP3_DECODER) += vp3.o OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56dsp.o \ - vp3dsp.o vp56rac.o + vp56rac.o OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o vp56dsp.o \ - vp3dsp.o vp6dsp.o vp56rac.o + vp6dsp.o vp56rac.o OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp8dsp.o vp56rac.o OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index d2bdd50daa..a8e531cf18 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -11,6 +11,7 @@ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o +OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o @@ -75,13 +76,11 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_neon.o \ arm/rv40dsp_neon.o \ arm/h264cmc_neon.o \ -NEON-OBJS-$(CONFIG_VP3_DECODER) += arm/vp3dsp_neon.o +NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \ - arm/vp3dsp_neon.o \ NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \ - arm/vp3dsp_neon.o \ NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_neon.o \ arm/vp8dsp_neon.o diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index ef5a8df85f..5533a28c42 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -29,11 +29,6 @@ void ff_simple_idct_neon(DCTELEM *data); void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); -void ff_vp3_idct_neon(DCTELEM *data); -void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); -void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); -void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data); - void ff_clear_block_neon(DCTELEM *block); void ff_clear_blocks_neon(DCTELEM *blocks); @@ -147,9 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); -void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); -void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); - void ff_vector_fmul_window_neon(float *dst, const float *src0, const float *src1, const float *win, int len); void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, @@ -186,13 +178,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->idct_add = ff_simple_idct_add_neon; c->idct = ff_simple_idct_neon; c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; - } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || - CONFIG_VP6_DECODER) && - avctx->idct_algo == FF_IDCT_VP3) { - c->idct_put = ff_vp3_idct_put_neon; - c->idct_add = ff_vp3_idct_add_neon; - c->idct = ff_vp3_idct_neon; - c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } } @@ -319,12 +304,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; } - if (CONFIG_VP3_DECODER) { - c->vp3_v_loop_filter = ff_vp3_v_loop_filter_neon; - c->vp3_h_loop_filter = ff_vp3_h_loop_filter_neon; - c->vp3_idct_dc_add = ff_vp3_idct_dc_add_neon; - } - c->vector_fmul_window = ff_vector_fmul_window_neon; c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; c->butterflies_float = ff_butterflies_float_neon; diff --git a/libavcodec/arm/vp3dsp_init_arm.c b/libavcodec/arm/vp3dsp_init_arm.c new file mode 100644 index 0000000000..90fc34b218 --- /dev/null +++ b/libavcodec/arm/vp3dsp_init_arm.c @@ -0,0 +1,45 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavcodec/vp3dsp.h" + +void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); +void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); +void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data); + +void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); +void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); + +av_cold void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { + c->idct_put = ff_vp3_idct_put_neon; + c->idct_add = ff_vp3_idct_add_neon; + c->idct_dc_add = ff_vp3_idct_dc_add_neon; + c->v_loop_filter = ff_vp3_v_loop_filter_neon; + c->h_loop_filter = ff_vp3_h_loop_filter_neon; + c->idct_perm = FF_TRANSPOSE_IDCT_PERM; + } +} diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S index 70cfd29121..0c88562b45 100644 --- a/libavcodec/arm/vp3dsp_neon.S +++ b/libavcodec/arm/vp3dsp_neon.S @@ -260,32 +260,6 @@ endfunc VP3_IDCT_END row VP3_IDCT_END col -function ff_vp3_idct_neon, export=1 - mov ip, lr - mov r2, r0 - bl vp3_idct_start_neon - bl vp3_idct_end_row_neon - mov r3, #8 - bl vp3_idct_core_neon - bl vp3_idct_end_col_neon - mov lr, ip - vpop {d8-d15} - - vshr.s16 q8, q8, #4 - vshr.s16 q9, q9, #4 - vshr.s16 q10, q10, #4 - vshr.s16 q11, q11, #4 - vshr.s16 q12, q12, #4 - vst1.64 {d16-d19}, [r0,:128]! - vshr.s16 q13, q13, #4 - vshr.s16 q14, q14, #4 - vst1.64 {d20-d23}, [r0,:128]! - vshr.s16 q15, q15, #4 - vst1.64 {d24-d27}, [r0,:128]! - vst1.64 {d28-d31}, [r0,:128]! - bx lr -endfunc - function ff_vp3_idct_put_neon, export=1 mov ip, lr bl vp3_idct_start_neon diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 357792693e..ef3143132e 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2870,12 +2870,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->idct_add= ff_jref_idct_add; c->idct = ff_j_rev_dct; c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && - avctx->idct_algo==FF_IDCT_VP3){ - c->idct_put= ff_vp3_idct_put_c; - c->idct_add= ff_vp3_idct_add_c; - c->idct = ff_vp3_idct_c; - c->idct_permutation_type= FF_NO_IDCT_PERM; }else if(avctx->idct_algo==FF_IDCT_WMV2){ c->idct_put= ff_wmv2_idct_put_c; c->idct_add= ff_wmv2_idct_add_c; @@ -3037,12 +3031,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->h263_v_loop_filter= h263_v_loop_filter_c; } - if (CONFIG_VP3_DECODER) { - c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; - c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; - c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; - } - c->h261_loop_filter= h261_loop_filter_c; c->try_8x8basis= try_8x8basis_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index af1104825f..7b533cc0ab 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -108,15 +108,6 @@ PUTAVG_PIXELS(14) #define ff_put_pixels16x16_c ff_put_pixels16x16_8_c #define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c -/* VP3 DSP functions */ -void ff_vp3_idct_c(DCTELEM *block/* align 16*/); -void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); -void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); -void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/); - -void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); -void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); - /* EA functions */ void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); @@ -401,10 +392,6 @@ typedef struct DSPContext { void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); - void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/); - void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); - void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); - /* assume len is a multiple of 4, and arrays are 16-byte aligned */ void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 31f4fb8ecd..e5d1d39d43 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -1,14 +1,13 @@ OBJS += ppc/dsputil_ppc.o \ +OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o + FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o ALTIVEC-OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o \ $(FFT-OBJS-yes) ALTIVEC-OBJS-$(CONFIG_H264DSP) += ppc/h264_altivec.o ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodec_altivec.o ALTIVEC-OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o -ALTIVEC-OBJS-$(CONFIG_VP3_DECODER) += ppc/vp3dsp_altivec.o -ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o -ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o ALTIVEC-OBJS += ppc/dsputil_altivec.o \ diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index 6c87782e6d..0b5e404225 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -36,10 +36,6 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); -void ff_vp3_idct_altivec(DCTELEM *block); -void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block); -void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block); - void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 195aa20906..c6fdc8e592 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -193,12 +193,6 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) c->idct_put = ff_idct_put_altivec; c->idct_add = ff_idct_add_altivec; c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; - }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) && - avctx->idct_algo==FF_IDCT_VP3){ - c->idct_put = ff_vp3_idct_put_altivec; - c->idct_add = ff_vp3_idct_add_altivec; - c->idct = ff_vp3_idct_altivec; - c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } } diff --git a/libavcodec/ppc/vp3dsp_altivec.c b/libavcodec/ppc/vp3dsp_altivec.c index 950e5c7419..ac00c933bb 100644 --- a/libavcodec/ppc/vp3dsp_altivec.c +++ b/libavcodec/ppc/vp3dsp_altivec.c @@ -18,6 +18,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavcodec/vp3dsp.h" + +#if HAVE_ALTIVEC + #include "libavutil/ppc/types_altivec.h" #include "libavutil/ppc/util_altivec.h" #include "libavcodec/dsputil.h" @@ -107,25 +114,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C) #define ADD8(a) vec_add(a, eight) #define SHIFT4(a) vec_sra(a, four) -void ff_vp3_idct_altivec(DCTELEM block[64]) -{ - IDCT_START - - IDCT_1D(NOP, NOP) - TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7); - IDCT_1D(ADD8, SHIFT4) - - vec_st(b0, 0x00, block); - vec_st(b1, 0x10, block); - vec_st(b2, 0x20, block); - vec_st(b3, 0x30, block); - vec_st(b4, 0x40, block); - vec_st(b5, 0x50, block); - vec_st(b6, 0x60, block); - vec_st(b7, 0x70, block); -} - -void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) +static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) { vec_u8 t; IDCT_START @@ -153,7 +142,7 @@ void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64]) PUT(b7) } -void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) +static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) { LOAD_ZERO; vec_u8 t, vdst; @@ -183,3 +172,16 @@ void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64]) ADD(b6) dst += stride; ADD(b7) } + +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags) +{ +#if HAVE_ALTIVEC + if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { + c->idct_put = vp3_idct_put_altivec; + c->idct_add = vp3_idct_add_altivec; + c->idct_perm = FF_TRANSPOSE_IDCT_PERM; + } +#endif +} diff --git a/libavcodec/tscc2.c b/libavcodec/tscc2.c index 9c8d2341ce..a8fd652e59 100644 --- a/libavcodec/tscc2.c +++ b/libavcodec/tscc2.c @@ -91,11 +91,11 @@ static av_cold int init_vlcs(TSCC2Context *c) #define DCT1D(d0, d1, d2, d3, s0, s1, s2, s3, OP) \ OP(d0, 5 * ((s0) + (s1) + (s2)) + 2 * (s3)); \ OP(d1, 5 * ((s0) - (s2) - (s3)) + 2 * (s1)); \ - OP(d2, 5 * ((s0) - (s2) + (s3)) - 2 * (s3)); \ + OP(d2, 5 * ((s0) - (s2) + (s3)) - 2 * (s1)); \ OP(d3, 5 * ((s0) - (s1) + (s2)) - 2 * (s3)); \ #define COL_OP(a, b) a = b -#define ROW_OP(a, b) a = (((b) + 0x20) >> 6) + 0x80 +#define ROW_OP(a, b) a = ((b) + 0x20) >> 6 static void tscc2_idct4_put(int *in, int q[3], uint8_t *dst, int stride) { @@ -158,7 +158,7 @@ static int tscc2_decode_mb(TSCC2Context *c, int *q, int vlc_set, } dc = (dc + prev_dc) & 0xFF; prev_dc = dc; - c->block[0] = dc - 0x80; + c->block[0] = dc; nc = get_vlc2(gb, c->nc_vlc[vlc_set].table, 9, 1); if (nc == -1) diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c index 81f6b8925c..a3bfd74666 100644 --- a/libavcodec/vp3.c +++ b/libavcodec/vp3.c @@ -40,6 +40,7 @@ #include "get_bits.h" #include "vp3data.h" +#include "vp3dsp.h" #include "xiph.h" #include "thread.h" @@ -135,6 +136,7 @@ typedef struct Vp3DecodeContext { AVFrame current_frame; int keyframe; DSPContext dsp; + VP3DSPContext vp3dsp; int flipped_image; int last_slice_end; int skip_loop_filter; @@ -1302,14 +1304,14 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye { /* do not perform left edge filter for left columns frags */ if (x > 0) { - s->dsp.vp3_h_loop_filter( + s->vp3dsp.h_loop_filter( plane_data + 8*x, stride, bounding_values); } /* do not perform top edge filter for top row fragments */ if (y > 0) { - s->dsp.vp3_v_loop_filter( + s->vp3dsp.v_loop_filter( plane_data + 8*x, stride, bounding_values); } @@ -1319,7 +1321,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye * in this frame (it will be filtered in next iteration) */ if ((x < width - 1) && (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { - s->dsp.vp3_h_loop_filter( + s->vp3dsp.h_loop_filter( plane_data + 8*x + 8, stride, bounding_values); } @@ -1329,7 +1331,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye * in this frame (it will be filtered in the next row) */ if ((y < height - 1) && (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { - s->dsp.vp3_v_loop_filter( + s->vp3dsp.v_loop_filter( plane_data + 8*x + 8*stride, stride, bounding_values); } @@ -1574,20 +1576,18 @@ static void render_slice(Vp3DecodeContext *s, int slice) if (s->all_fragments[i].coding_method == MODE_INTRA) { vp3_dequant(s, s->all_fragments + i, plane, 0, block); - if(s->avctx->idct_algo!=FF_IDCT_VP3) - block[0] += 128<<3; - s->dsp.idct_put( + s->vp3dsp.idct_put( output_plane + first_pixel, stride, block); } else { if (vp3_dequant(s, s->all_fragments + i, plane, 1, block)) { - s->dsp.idct_add( + s->vp3dsp.idct_add( output_plane + first_pixel, stride, block); } else { - s->dsp.vp3_idct_dc_add(output_plane + first_pixel, stride, block); + s->vp3dsp.idct_dc_add(output_plane + first_pixel, stride, block); } } } else { @@ -1670,10 +1670,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx) if (avctx->codec_id != CODEC_ID_THEORA) avctx->pix_fmt = PIX_FMT_YUV420P; avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; - if(avctx->idct_algo==FF_IDCT_AUTO) - avctx->idct_algo=FF_IDCT_VP3; ff_dsputil_init(&s->dsp, avctx); + ff_vp3dsp_init(&s->vp3dsp, avctx->flags); + ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm); ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); /* initialize to an impossible value which will force a recalculation diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index 9fded0f53e..4c5ff6a448 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -24,8 +24,10 @@ * source code. */ +#include "libavutil/attributes.h" #include "avcodec.h" #include "dsputil.h" +#include "vp3dsp.h" #define IdctAdjustBeforeShift 8 #define xC1S7 64277 @@ -210,19 +212,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int } } -void ff_vp3_idct_c(DCTELEM *block/* align 16*/){ - idct(NULL, 0, block, 0); -} - -void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ +static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ idct(dest, line_size, block, 1); } -void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ +static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){ idct(dest, line_size, block, 2); } -void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){ +static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, + const DCTELEM *block/*align 16*/){ int i, dc = (block[0] + 15) >> 5; for(i = 0; i < 8; i++){ @@ -238,7 +237,8 @@ void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM } } -void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values) +static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, + int *bounding_values) { unsigned char *end; int filter_value; @@ -254,7 +254,8 @@ void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu } } -void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values) +static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, + int *bounding_values) { unsigned char *end; int filter_value; @@ -268,3 +269,21 @@ void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value); } } + +av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) +{ + c->idct_put = vp3_idct_put_c; + c->idct_add = vp3_idct_add_c; + c->idct_dc_add = vp3_idct_dc_add_c; + c->v_loop_filter = vp3_v_loop_filter_c; + c->h_loop_filter = vp3_h_loop_filter_c; + + c->idct_perm = FF_NO_IDCT_PERM; + + if (ARCH_ARM) + ff_vp3dsp_init_arm(c, flags); + if (ARCH_PPC) + ff_vp3dsp_init_ppc(c, flags); + if (ARCH_X86) + ff_vp3dsp_init_x86(c, flags); +} diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h new file mode 100644 index 0000000000..a14dec1eb4 --- /dev/null +++ b/libavcodec/vp3dsp.h @@ -0,0 +1,40 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VP3DSP_H +#define AVCODEC_VP3DSP_H + +#include <stdint.h> +#include "dsputil.h" + +typedef struct VP3DSPContext { + void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block); + void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block); + void (*idct_dc_add)(uint8_t *dest, int line_size, const DCTELEM *block); + void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values); + void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values); + + int idct_perm; +} VP3DSPContext; + +void ff_vp3dsp_init(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags); +void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags); + +#endif /* AVCODEC_VP3DSP_H */ diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c index c6e32af867..f98d22c1cb 100644 --- a/libavcodec/vp56.c +++ b/libavcodec/vp56.c @@ -411,7 +411,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) case VP56_MB_INTRA: for (b=0; b<b_max; b++) { plane = ff_vp56_b2p[b+ab]; - s->dsp.idct_put(frame_current->data[plane] + s->block_offset[b], + s->vp3dsp.idct_put(frame_current->data[plane] + s->block_offset[b], s->stride[plane], s->block_coeff[b]); } break; @@ -424,7 +424,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) s->dsp.put_pixels_tab[1][0](frame_current->data[plane] + off, frame_ref->data[plane] + off, s->stride[plane], 8); - s->dsp.idct_add(frame_current->data[plane] + off, + s->vp3dsp.idct_add(frame_current->data[plane] + off, s->stride[plane], s->block_coeff[b]); } break; @@ -442,7 +442,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) plane = ff_vp56_b2p[b+ab]; vp56_mc(s, b, plane, frame_ref->data[plane], s->stride[plane], 16*col+x_off, 16*row+y_off); - s->dsp.idct_add(frame_current->data[plane] + s->block_offset[b], + s->vp3dsp.idct_add(frame_current->data[plane] + s->block_offset[b], s->stride[plane], s->block_coeff[b]); } break; @@ -666,10 +666,10 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha) s->avctx = avctx; avctx->pix_fmt = has_alpha ? PIX_FMT_YUVA420P : PIX_FMT_YUV420P; - if (avctx->idct_algo == FF_IDCT_AUTO) - avctx->idct_algo = FF_IDCT_VP3; ff_dsputil_init(&s->dsp, avctx); + ff_vp3dsp_init(&s->vp3dsp, avctx->flags); ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id); + ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm); ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct); for (i=0; i<4; i++) { diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h index e135718d20..12f9380b4d 100644 --- a/libavcodec/vp56.h +++ b/libavcodec/vp56.h @@ -30,6 +30,7 @@ #include "dsputil.h" #include "get_bits.h" #include "bytestream.h" +#include "vp3dsp.h" #include "vp56dsp.h" typedef struct vp56_context VP56Context; @@ -91,6 +92,7 @@ typedef struct { struct vp56_context { AVCodecContext *avctx; DSPContext dsp; + VP3DSPContext vp3dsp; VP56DSPContext vp56dsp; ScanTable scantable; AVFrame frames[4]; diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 8acbd0774c..e18074a300 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -1,5 +1,6 @@ OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o +OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o MMX-OBJS += x86/dsputil_mmx.o \ @@ -67,10 +68,8 @@ YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \ x86/rv40dsp.o YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o -YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o -YASM-OBJS-$(CONFIG_VP5_DECODER) += x86/vp3dsp.o -YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp.o \ - x86/vp56dsp.o +YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o +YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp.o YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o YASM-OBJS += x86/dsputil_yasm.o \ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index e829bbd8b6..8049bdea72 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2574,20 +2574,6 @@ static void vector_clipf_sse(float *dst, const float *src, ); } -void ff_vp3_idct_mmx(int16_t *input_data); -void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block); -void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block); - -void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, - const DCTELEM *block); - -void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); -void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); - -void ff_vp3_idct_sse2(int16_t *input_data); -void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); -void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); - int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, int order); int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, @@ -2782,14 +2768,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; } - - if (CONFIG_VP3_DECODER && HAVE_YASM) { - c->vp3_v_loop_filter = ff_vp3_v_loop_filter_mmx2; - c->vp3_h_loop_filter = ff_vp3_h_loop_filter_mmx2; - } } - if (CONFIG_VP3_DECODER && HAVE_YASM) - c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { @@ -3165,20 +3144,6 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) } c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; #endif - } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || - CONFIG_VP6_DECODER) && - idct_algo == FF_IDCT_VP3 && HAVE_YASM) { - if (mm_flags & AV_CPU_FLAG_SSE2) { - c->idct_put = ff_vp3_idct_put_sse2; - c->idct_add = ff_vp3_idct_add_sse2; - c->idct = ff_vp3_idct_sse2; - c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; - } else { - c->idct_put = ff_vp3_idct_put_mmx; - c->idct_add = ff_vp3_idct_add_mmx; - c->idct = ff_vp3_idct_mmx; - c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; - } } else if (idct_algo == FF_IDCT_CAVS) { c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } else if (idct_algo == FF_IDCT_XVIDMMX) { diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm index 99621fb062..0e0bd29a99 100644 --- a/libavcodec/x86/vp3dsp.asm +++ b/libavcodec/x86/vp3dsp.asm @@ -524,10 +524,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 %endmacro %macro vp3_idct_funcs 3 -cglobal vp3_idct_%1, 1, 1, %2 - VP3_IDCT_%1 r0 - RET - cglobal vp3_idct_put_%1, 3, %3, %2 VP3_IDCT_%1 r2 %if ARCH_X86_64 diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c new file mode 100644 index 0000000000..3ae2a90e57 --- /dev/null +++ b/libavcodec/x86/vp3dsp_init.c @@ -0,0 +1,65 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/vp3dsp.h" +#include "config.h" + +void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block); +void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block); + +void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); +void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); + +void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, + const DCTELEM *block); + +void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); +void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); + +av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) +{ +#if HAVE_YASM + int cpuflags = av_get_cpu_flags(); + + if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) { + c->idct_put = ff_vp3_idct_put_mmx; + c->idct_add = ff_vp3_idct_add_mmx; + c->idct_perm = FF_PARTTRANS_IDCT_PERM; + } + + if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) { + c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; + + if (!(flags & CODEC_FLAG_BITEXACT)) { + c->v_loop_filter = ff_vp3_v_loop_filter_mmx2; + c->h_loop_filter = ff_vp3_h_loop_filter_mmx2; + } + } + + if (cpuflags & AV_CPU_FLAG_SSE2) { + c->idct_put = ff_vp3_idct_put_sse2; + c->idct_add = ff_vp3_idct_add_sse2; + c->idct_perm = FF_TRANSPOSE_IDCT_PERM; + } +#endif +} |