diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-01-26 14:33:06 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-01-26 14:33:06 +0100 |
commit | 446d62f0cfea35ad1695f756b5275288498e51e1 (patch) | |
tree | ca3a8425407f0b2f1fff00e4575f7e82b6ac3708 | |
parent | d235d240d8ba88b931cb9ca0aca2cfa586c7eea0 (diff) | |
parent | 69c25c9284645cf5189af2ede42d6f53828f3b45 (diff) | |
download | ffmpeg-446d62f0cfea35ad1695f756b5275288498e51e1.tar.gz |
Merge commit '69c25c9284645cf5189af2ede42d6f53828f3b45'
* commit '69c25c9284645cf5189af2ede42d6f53828f3b45':
dnxhdenc: fix invalid reads in dnxhd_mb_var_thread().
x86: h264qpel: Move stray comment to the right spot and clarify it
atrac3: use correct loop variable in add_tonal_components()
Conflicts:
tests/ref/vsynth/vsynth1-dnxhd-1080i
tests/ref/vsynth/vsynth2-dnxhd-1080i
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/dnxhdenc.c | 27 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 2 | ||||
-rw-r--r-- | libavcodec/x86/h264_qpel.c | 4 | ||||
-rw-r--r-- | tests/ref/vsynth/vsynth1-dnxhd-1080i | 4 | ||||
-rw-r--r-- | tests/ref/vsynth/vsynth2-dnxhd-1080i | 4 |
5 files changed, 32 insertions, 9 deletions
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c index 5fbf46b281..5ad6164e8f 100644 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@ -629,14 +629,35 @@ static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx) static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) { DNXHDEncContext *ctx = avctx->priv_data; - int mb_y = jobnr, mb_x; + int mb_y = jobnr, mb_x, x, y; + int partial_last_row = (mb_y == ctx->m.mb_height - 1) && + ((avctx->height >> ctx->interlaced) & 0xF); + ctx = ctx->thread[threadnr]; if (ctx->cid_table->bit_depth == 8) { uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize); for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) { unsigned mb = mb_y * ctx->m.mb_width + mb_x; - int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize); - int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)sum*sum)>>8)+128)>>8; + int sum; + int varc; + + if (!partial_last_row && mb_x * 16 <= avctx->width - 16) { + sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize); + varc = ctx->m.dsp.pix_norm1(pix, ctx->m.linesize); + } else { + int bw = FFMIN(avctx->width - 16 * mb_x, 16); + int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16); + sum = varc = 0; + for (y = 0; y < bh; y++) { + for (x = 0; x < bw; x++) { + uint8_t val = pix[x + y * ctx->m.linesize]; + sum += val; + varc += val * val; + } + } + } + varc = (varc - (((unsigned)sum * sum) >> 8) + 128) >> 8; + ctx->mb_cmp[mb].value = varc; ctx->mb_cmp[mb].mb = mb; } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 3c30f50b73..923de48736 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2309,8 +2309,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) const int bit_depth = avctx->bits_per_raw_sample; if (bit_depth == 10) { - // AVX implies !cache64. - // TODO: Port cache(32|64) detection from x264. if (CONFIG_H264CHROMA) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index d6c008ee2a..01172afbf1 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -611,6 +611,10 @@ void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) } if (EXTERNAL_AVX(mm_flags)) { + /* AVX implies 64 byte cache lines without the need to avoid unaligned + * memory accesses that cross the boundary between two cache lines. + * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid + * having to treat SSE2 functions with such properties as AVX. */ if (bit_depth == 10) { H264_QPEL_FUNCS_10(1, 0, sse2); H264_QPEL_FUNCS_10(2, 0, sse2); diff --git a/tests/ref/vsynth/vsynth1-dnxhd-1080i b/tests/ref/vsynth/vsynth1-dnxhd-1080i index 899ef9ee07..dbe2c371e0 100644 --- a/tests/ref/vsynth/vsynth1-dnxhd-1080i +++ b/tests/ref/vsynth/vsynth1-dnxhd-1080i @@ -1,4 +1,4 @@ -9a4781b0a052d9efaafbaf8893db9632 *tests/data/fate/vsynth1-dnxhd-1080i.mov +124c991ee3ac0caef39a58a45287a762 *tests/data/fate/vsynth1-dnxhd-1080i.mov 3031911 tests/data/fate/vsynth1-dnxhd-1080i.mov -e55bf857297ba4d911a9d17a984b125d *tests/data/fate/vsynth1-dnxhd-1080i.out.rawvideo +a09132c6db44f415e831dcaa630a351b *tests/data/fate/vsynth1-dnxhd-1080i.out.rawvideo stddev: 6.29 PSNR: 32.15 MAXDIFF: 64 bytes: 7603200/ 760320 diff --git a/tests/ref/vsynth/vsynth2-dnxhd-1080i b/tests/ref/vsynth/vsynth2-dnxhd-1080i index 874e60bc55..f657eb4c95 100644 --- a/tests/ref/vsynth/vsynth2-dnxhd-1080i +++ b/tests/ref/vsynth/vsynth2-dnxhd-1080i @@ -1,4 +1,4 @@ -93b878dcf8f2ecc9798d0e0885c9eec9 *tests/data/fate/vsynth2-dnxhd-1080i.mov +5d7ab75ce6e547ed63a7a0eacf18f078 *tests/data/fate/vsynth2-dnxhd-1080i.mov 3031911 tests/data/fate/vsynth2-dnxhd-1080i.mov -27edc8dfe2ca19097c7f9119705b3a60 *tests/data/fate/vsynth2-dnxhd-1080i.out.rawvideo +744ba46da5d4c19a28562ea31061d170 *tests/data/fate/vsynth2-dnxhd-1080i.out.rawvideo stddev: 1.31 PSNR: 45.77 MAXDIFF: 23 bytes: 7603200/ 760320 |