diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-04-05 22:26:50 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-04-05 22:26:50 +0200 |
commit | 2c5a2958e961c25434a54c832a04139525f661da (patch) | |
tree | 7fba548115bae83f10089b8c9b5ff77ee9f14224 /libavcodec | |
parent | 3e4b5e68c1589590736fce62c0e677c4632f965b (diff) | |
parent | 0becb07842b57ea225ddf0726de33b5f8e669297 (diff) | |
download | ffmpeg-2c5a2958e961c25434a54c832a04139525f661da.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
h264: Factorize declaration of mb_sizes array.
vsrc_buffer: when no frame is available, return an error instead of segfaulting.
configure: add dl to frei0r extralibs.
dsputil x86: use SSE float instruction instead of SSE2 integer equivalent
dsputil x86: remove deprecated parameter from scalarproduct_int16 prototype
vp8dsp x86: perform rounding shift with a single instruction
fate: add BMP tests.
swscale: handle complete dimensions for monoblack/white.
aacenc: Mark deinterleave_input_samples argument as const.
vf_unsharp: Mark readonly variable as const.
h264: fix 4:2:2 PCM-macroblocks decoding
Conflicts:
configure
libavcodec/h264.h
libavcodec/x86/dsputil_mmx.c
libavfilter/vf_unsharp.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/aacenc.c | 2 | ||||
-rw-r--r-- | libavcodec/h264.c | 16 | ||||
-rw-r--r-- | libavcodec/h264.h | 1 | ||||
-rw-r--r-- | libavcodec/h264_cabac.c | 4 | ||||
-rw-r--r-- | libavcodec/h264_cavlc.c | 4 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 4 | ||||
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 18 |
7 files changed, 27 insertions, 22 deletions
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index fd72dafd17..3ac2ad890d 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -477,7 +477,7 @@ static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, * Deinterleave input samples. * Channels are reordered from libavcodec's default order to AAC order. */ -static void deinterleave_input_samples(AACEncContext *s, AVFrame *frame) +static void deinterleave_input_samples(AACEncContext *s, const AVFrame *frame) { int ch, i; const int sinc = s->channels; diff --git a/libavcodec/h264.c b/libavcodec/h264.c index fc612905ab..b909600b3e 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -48,6 +48,8 @@ // #undef NDEBUG #include <assert.h> +const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 }; + static const uint8_t rem6[QP_MAX_NUM + 1] = { 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, @@ -2126,7 +2128,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, if (pixel_shift) { int j; GetBitContext gb; - init_get_bits(&gb, (uint8_t *)h->mb, 384 * bit_depth); + init_get_bits(&gb, (uint8_t *)h->mb, + ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth); for (i = 0; i < 16; i++) { uint16_t *tmp_y = (uint16_t *)(dest_y + i * linesize); @@ -2157,7 +2160,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, } } else { for (i = 0; i < 16; i++) - memcpy(dest_y + i * linesize, h->mb + i * 8, 16); + memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16); if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) { if (!h->sps.chroma_format_idc) { for (i = 0; i < 8; i++) { @@ -2165,9 +2168,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8); } } else { + uint8_t *src_cb = (uint8_t *)h->mb + 256; + uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8; for (i = 0; i < block_h; i++) { - memcpy(dest_cb + i * uvlinesize, h->mb + 128 + i * 4, 8); - memcpy(dest_cr + i * uvlinesize, h->mb + 160 + i * 4, 8); + memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8); + memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8); } } } @@ -2358,7 +2363,8 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, } else { for (p = 0; p < plane_count; p++) for (i = 0; i < 16; i++) - memcpy(dest[p] + i * linesize, h->mb + p * 128 + i * 8, 16); + memcpy(dest[p] + i * linesize, + (uint8_t *)h->mb + p * 256 + i * 16, 16); } } else { if (IS_INTRA(mb_type)) { diff --git a/libavcodec/h264.h b/libavcodec/h264.h index 153d83c695..f5542bfb1d 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -602,6 +602,7 @@ typedef struct H264Context{ extern const uint8_t ff_h264_chroma_qp[5][QP_MAX_NUM+1]; ///< One chroma qp table for each possible bit depth (8-12). +extern const uint16_t ff_h264_mb_sizes[4]; /** * Decode SEI diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index 2c83348a5f..65f2cb4dc8 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -1984,8 +1984,8 @@ decode_intra_mb: h->slice_table[ mb_xy ]= h->slice_num; if(IS_INTRA_PCM(mb_type)) { - static const uint16_t mb_sizes[4] = {256,384,512,768}; - const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3; + const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] * + h->sps.bit_depth_luma >> 3; const uint8_t *ptr; // We assume these blocks are very rare so we do not optimize it. diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index baddd36800..a4debade2e 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -765,8 +765,8 @@ decode_intra_mb: if(IS_INTRA_PCM(mb_type)){ unsigned int x; - static const uint16_t mb_sizes[4] = {256,384,512,768}; - const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3; + const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] * + h->sps.bit_depth_luma >> 3; // We assume these blocks are very rare so we do not optimize it. align_get_bits(&s->gb); diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 3d85bcf132..e17a8005a7 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2655,9 +2655,9 @@ void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, - int len); + int order); int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, - int len); + int order); int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 87e05c4ddf..42bb479b9b 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -144,6 +144,8 @@ filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9 filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 +pw_256: times 8 dw 256 + pw_20091: times 4 dw 20091 pw_17734: times 4 dw 17734 @@ -205,8 +207,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h pmaddubsw m2, m7 paddsw m0, m1 paddsw m0, m2 - paddsw m0, [pw_64] - psraw m0, 7 + pmulhrsw m0, [pw_256] packuswb m0, m0 movh [dstq], m0 ; store @@ -219,7 +220,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 - mova m2, [pw_64] + mova m2, [pw_256] mova m3, [filter_h2_shuf] mova m4, [filter_h4_shuf] %ifdef PIC @@ -235,9 +236,8 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h pshufb m1, m4 pmaddubsw m0, m5 pmaddubsw m1, m6 - paddsw m0, m2 paddsw m0, m1 - psraw m0, 7 + pmulhrsw m0, m2 packuswb m0, m0 movh [dstq], m0 ; store @@ -255,7 +255,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr %endif mova m5, [fourtap_filter_hb+myq-16] mova m6, [fourtap_filter_hb+myq] - mova m7, [pw_64] + mova m7, [pw_256] ; read 3 lines sub srcq, srcstrideq @@ -275,8 +275,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr pmaddubsw m2, m6 paddsw m4, m2 mova m2, m3 - paddsw m4, m7 - psraw m4, 7 + pmulhrsw m4, m7 packuswb m4, m4 movh [dstq], m4 @@ -319,9 +318,8 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr paddsw m6, m1 paddsw m6, m7 mova m1, m2 - paddsw m6, [pw_64] mova m2, m3 - psraw m6, 7 + pmulhrsw m6, [pw_256] mova m3, m4 packuswb m6, m6 mova m4, m5 |