diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-05-16 22:29:04 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-05-16 22:32:05 +0200 |
commit | 703e920bb75053bf6b87d41d198cbbfbce3fb7ad (patch) | |
tree | 97b86b3ae104e48e6098fc0c1c243e096e4cdd32 /libavcodec | |
parent | 4d3787763218de4c3e838b31f0fb2153b8d6af20 (diff) | |
parent | 6ce51a9b392aaf647c4cdfd149fff1af5259a6fd (diff) | |
download | ffmpeg-703e920bb75053bf6b87d41d198cbbfbce3fb7ad.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
fate: Work around non-standard wc implementations at more places
fate: work around non-standard wc implementations
x86: rv40: Mark rv40_weight functions as MMX2; they use MMX2 instructions.
ac3dsp: simplify x86 versions of ac3_max_msb_abs_int16
fate: use standard diff options
tta: Fix comment about channel number; TTA supports >2 channels.
avfilter: Move ff_get_ref_perms_string() to where it is used.
build: Add 'check' target to run all compile and test targets.
indeo3: validate new frame size before resetting decoder
indeo3: when freeing buffers, set pointers referencing them to NULL as well
indeo3: initialise pixel planes on allocation
indeo3: ensure that decoded cell data is in 7-bit range as presumed by decoder
fate: rename psx-str-v3-mdec to mdec-v3
fate: convert psx-str to a demuxer test
lavf: add mdec to is_intra_only() list
Conflicts:
doc/developer.texi
libavcodec/indeo3.c
libavfilter/video.c
libavformat/utils.c
tests/fate/demux.mak
tests/fate/video.mak
tests/lavf-regression.sh
tests/ref/vsynth1/cljr
tests/ref/vsynth1/ffvhuff
tests/ref/vsynth2/cljr
tests/ref/vsynth2/ffvhuff
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/indeo3.c | 40 | ||||
-rw-r--r-- | libavcodec/tta.c | 2 | ||||
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 59 | ||||
-rw-r--r-- | libavcodec/x86/ac3dsp_mmx.c | 10 | ||||
-rw-r--r-- | libavcodec/x86/rv40dsp.asm | 2 | ||||
-rw-r--r-- | libavcodec/x86/rv40dsp_init.c | 10 |
6 files changed, 76 insertions, 47 deletions
diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c index f27e27de70..83eef6479b 100644 --- a/libavcodec/indeo3.c +++ b/libavcodec/indeo3.c @@ -194,6 +194,8 @@ static av_cold int allocate_frame_buffers(Indeo3DecodeContext *ctx, /* set buffer pointers = buf_ptr + pitch and thus skip the INTRA prediction line */ ctx->planes[p].pixels[0] = ctx->planes[p].buffers[0] + ctx->planes[p].pitch; ctx->planes[p].pixels[1] = ctx->planes[p].buffers[1] + ctx->planes[p].pitch; + memset(ctx->planes[p].pixels[0], 0, ctx->planes[p].pitch * ctx->planes[p].height); + memset(ctx->planes[p].pixels[1], 0, ctx->planes[p].pitch * ctx->planes[p].height); } return 0; @@ -210,6 +212,7 @@ static av_cold void free_frame_buffers(Indeo3DecodeContext *ctx) for (p = 0; p < 3; p++) { av_freep(&ctx->planes[p].buffers[0]); av_freep(&ctx->planes[p].buffers[1]); + ctx->planes[p].pixels[0] = ctx->planes[p].pixels[1] = 0; } } @@ -350,8 +353,10 @@ if (*data_ptr >= last_ptr) \ fill_64(dst, pix64, num_lines << 1, row_offset) #define APPLY_DELTA_4 \ - AV_WN16A(dst + line_offset , AV_RN16A(ref ) + delta_tab->deltas[dyad1]);\ - AV_WN16A(dst + line_offset + 2, AV_RN16A(ref + 2) + delta_tab->deltas[dyad2]);\ + AV_WN16A(dst + line_offset ,\ + (AV_RN16A(ref ) + delta_tab->deltas[dyad1]) & 0x7F7F);\ + AV_WN16A(dst + line_offset + 2,\ + (AV_RN16A(ref + 2) + delta_tab->deltas[dyad2]) & 0x7F7F);\ if (mode >= 3) {\ if (is_top_of_cell && !cell->ypos) {\ AV_COPY32(dst, dst + row_offset);\ @@ -364,14 +369,14 @@ if (*data_ptr >= last_ptr) \ /* apply two 32-bit VQ deltas to next even line */\ if (is_top_of_cell) { \ AV_WN32A(dst + row_offset , \ - replicate32(AV_RN32A(ref )) + delta_tab->deltas_m10[dyad1]);\ + (replicate32(AV_RN32A(ref )) + delta_tab->deltas_m10[dyad1]) & 0x7F7F7F7F);\ AV_WN32A(dst + row_offset + 4, \ - replicate32(AV_RN32A(ref + 4)) + delta_tab->deltas_m10[dyad2]);\ + (replicate32(AV_RN32A(ref + 4)) + delta_tab->deltas_m10[dyad2]) & 0x7F7F7F7F);\ } else { \ AV_WN32A(dst + row_offset , \ - AV_RN32A(ref ) + delta_tab->deltas_m10[dyad1]);\ + (AV_RN32A(ref ) + delta_tab->deltas_m10[dyad1]) & 0x7F7F7F7F);\ AV_WN32A(dst + row_offset + 4, \ - AV_RN32A(ref + 4) + delta_tab->deltas_m10[dyad2]);\ + (AV_RN32A(ref + 4) + delta_tab->deltas_m10[dyad2]) & 0x7F7F7F7F);\ } \ /* odd lines are not coded but rather interpolated/replicated */\ /* first line of the cell on the top of image? - replicate */\ @@ -385,22 +390,22 @@ if (*data_ptr >= last_ptr) \ #define APPLY_DELTA_1011_INTER \ if (mode == 10) { \ AV_WN32A(dst , \ - AV_RN32A(dst ) + delta_tab->deltas_m10[dyad1]);\ + (AV_RN32A(dst ) + delta_tab->deltas_m10[dyad1]) & 0x7F7F7F7F);\ AV_WN32A(dst + 4 , \ - AV_RN32A(dst + 4 ) + delta_tab->deltas_m10[dyad2]);\ + (AV_RN32A(dst + 4 ) + delta_tab->deltas_m10[dyad2]) & 0x7F7F7F7F);\ AV_WN32A(dst + row_offset , \ - AV_RN32A(dst + row_offset ) + delta_tab->deltas_m10[dyad1]);\ + (AV_RN32A(dst + row_offset ) + delta_tab->deltas_m10[dyad1]) & 0x7F7F7F7F);\ AV_WN32A(dst + row_offset + 4, \ - AV_RN32A(dst + row_offset + 4) + delta_tab->deltas_m10[dyad2]);\ + (AV_RN32A(dst + row_offset + 4) + delta_tab->deltas_m10[dyad2]) & 0x7F7F7F7F);\ } else { \ AV_WN16A(dst , \ - AV_RN16A(dst ) + delta_tab->deltas[dyad1]);\ + (AV_RN16A(dst ) + delta_tab->deltas[dyad1]) & 0x7F7F);\ AV_WN16A(dst + 2 , \ - AV_RN16A(dst + 2 ) + delta_tab->deltas[dyad2]);\ + (AV_RN16A(dst + 2 ) + delta_tab->deltas[dyad2]) & 0x7F7F);\ AV_WN16A(dst + row_offset , \ - AV_RN16A(dst + row_offset ) + delta_tab->deltas[dyad1]);\ + (AV_RN16A(dst + row_offset ) + delta_tab->deltas[dyad1]) & 0x7F7F);\ AV_WN16A(dst + row_offset + 2, \ - AV_RN16A(dst + row_offset + 2) + delta_tab->deltas[dyad2]);\ + (AV_RN16A(dst + row_offset + 2) + delta_tab->deltas[dyad2]) & 0x7F7F);\ } @@ -931,6 +936,13 @@ static int decode_frame_headers(Indeo3DecodeContext *ctx, AVCodecContext *avctx, av_dlog(avctx, "Frame dimensions changed!\n"); + if (width < 16 || width > 640 || + height < 16 || height > 480 || + width & 3 || height & 3) { + av_log(avctx, AV_LOG_ERROR, + "Invalid picture dimensions: %d x %d!\n", width, height); + return AVERROR_INVALIDDATA; + } free_frame_buffers(ctx); if ((res = allocate_frame_buffers(ctx, avctx, width, height)) < 0) return res; diff --git a/libavcodec/tta.c b/libavcodec/tta.c index 03fa12d5dc..84fa632960 100644 --- a/libavcodec/tta.c +++ b/libavcodec/tta.c @@ -402,7 +402,7 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data, if (cur_chan < (s->channels-1)) cur_chan++; else { - // decorrelate in case of stereo integer + // decorrelate in case of multiple channels if (s->channels > 1) { int32_t *r = p - 1; for (*p += *r / 2; r > p - s->channels; r--) diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 99e121ec68..4e0fce35d1 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -91,12 +91,36 @@ AC3_EXPONENT_MIN sse2 ; This is used for mmxext and sse2 because they have pminsw/pmaxsw. ;----------------------------------------------------------------------------- -%macro AC3_MAX_MSB_ABS_INT16 2 -cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len +; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word +%macro OR_WORDS_HORIZ 2 ; src, tmp +%if cpuflag(sse2) + movhlps %2, %1 + por %1, %2 + pshuflw %2, %1, q0032 + por %1, %2 + pshuflw %2, %1, q0001 + por %1, %2 +%elif cpuflag(mmx2) + pshufw %2, %1, q0032 + por %1, %2 + pshufw %2, %1, q0001 + por %1, %2 +%else ; mmx + movq %2, %1 + psrlq %2, 32 + por %1, %2 + movq %2, %1 + psrlq %2, 16 + por %1, %2 +%endif +%endmacro + +%macro AC3_MAX_MSB_ABS_INT16 1 +cglobal ac3_max_msb_abs_int16, 2,2,5, src, len pxor m2, m2 pxor m3, m3 .loop: -%ifidn %2, min_max +%ifidn %1, min_max mova m0, [srcq] mova m1, [srcq+mmsize] pminsw m2, m0 @@ -104,7 +128,7 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len pmaxsw m3, m0 pmaxsw m3, m1 %else ; or_abs -%ifidn %1, mmx +%if notcpuflag(ssse3) mova m0, [srcq] mova m1, [srcq+mmsize] ABS2 m0, m1, m3, m4 @@ -119,34 +143,27 @@ cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len add srcq, mmsize*2 sub lend, mmsize ja .loop -%ifidn %2, min_max +%ifidn %1, min_max ABS2 m2, m3, m0, m1 por m2, m3 %endif -%ifidn mmsize, 16 - movhlps m0, m2 - por m2, m0 -%endif - PSHUFLW m0, m2, 0xe - por m2, m0 - PSHUFLW m0, m2, 0x1 - por m2, m0 + OR_WORDS_HORIZ m2, m0 movd eax, m2 and eax, 0xFFFF RET %endmacro -INIT_MMX +INIT_MMX mmx %define ABS2 ABS2_MMX -%define PSHUFLW pshufw -AC3_MAX_MSB_ABS_INT16 mmx, or_abs +AC3_MAX_MSB_ABS_INT16 or_abs +INIT_MMX mmx2 %define ABS2 ABS2_MMX2 -AC3_MAX_MSB_ABS_INT16 mmxext, min_max -INIT_XMM -%define PSHUFLW pshuflw -AC3_MAX_MSB_ABS_INT16 sse2, min_max +AC3_MAX_MSB_ABS_INT16 min_max +INIT_XMM sse2 +AC3_MAX_MSB_ABS_INT16 min_max +INIT_XMM ssse3 %define ABS2 ABS2_SSSE3 -AC3_MAX_MSB_ABS_INT16 ssse3, or_abs +AC3_MAX_MSB_ABS_INT16 or_abs ;----------------------------------------------------------------------------- ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c index 9578e98d8b..54fa380130 100644 --- a/libavcodec/x86/ac3dsp_mmx.c +++ b/libavcodec/x86/ac3dsp_mmx.c @@ -27,10 +27,10 @@ extern void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int n extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs); extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs); -extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len); -extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len); -extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len); -extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len); +extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len); +extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len); +extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len); +extern int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len); extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift); extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift); @@ -67,7 +67,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) } if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) { c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; - c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext; + c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; } if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { c->float_to_fixed24 = ff_float_to_fixed24_sse; diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index 9b50940d4d..70c0c0400f 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -487,7 +487,7 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8 REP_RET %endmacro -INIT_MMX mmx +INIT_MMX mmx2 RV40_WEIGHT rnd, 8, 3 RV40_WEIGHT rnd, 16, 4 RV40_WEIGHT nornd, 8, 3 diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index 3f42363e4e..cc1ea45e88 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -52,7 +52,7 @@ void ff_rv40_weight_func_nornd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *sr int w1, int w2, ptrdiff_t stride); \ void ff_rv40_weight_func_nornd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \ int w1, int w2, ptrdiff_t stride); -DECLARE_WEIGHT(mmx) +DECLARE_WEIGHT(mmx2) DECLARE_WEIGHT(sse2) DECLARE_WEIGHT(ssse3) @@ -190,10 +190,6 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) if (mm_flags & AV_CPU_FLAG_MMX) { c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx; c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx; - c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx; - c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx; - c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx; - c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx; c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx; c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx; c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx; @@ -205,6 +201,10 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) if (mm_flags & AV_CPU_FLAG_MMX2) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; + c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; + c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx2; + c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx2; + c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx2; #if ARCH_X86_32 QPEL_MC_SET(avg_, _mmx2) #endif |