diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-07-22 11:56:53 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-07-22 12:08:52 +0200 |
commit | 4095fa903830f8395a26d6ee38c77ad6333a4f5e (patch) | |
tree | 828ada22309e543a181997b63c6ffca6868731ac /libavcodec | |
parent | 657eac048eb267d781de83849fe7616d29320832 (diff) | |
parent | bb32fded3623a20ff8999c2924315841c08c985c (diff) | |
download | ffmpeg-4095fa903830f8395a26d6ee38c77ad6333a4f5e.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
dnxhddec: optimise dnxhd_decode_dct_block()
rtp: remove disabled code
eac3enc: use different numbers of blocks per frame to allow higher bitrates
dnxhd: add regression test for 10-bit
dnxhd: 10-bit support
dsputil: update per-arch init funcs for non-h264 high bit depth
dsputil: template get_pixels() for different bit depths
dsputil: create 16/32-bit dctcoef versions of some functions
jfdctint: add 10-bit version
mov: add clcp type track as Subtitle stream.
mpeg4: add Mpeg4 Profiles names.
mpeg4: decode Level Profile for MPEG4 Part 2.
ffprobe: display bitstream level.
imgconvert: remove unused glue and xglue macros
Conflicts:
libavcodec/dsputil_template.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
39 files changed, 1279 insertions, 734 deletions
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index f45ed3c89b..df847b8a12 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -186,7 +186,7 @@ void ff_ac3_adjust_frame_size(AC3EncodeContext *s) s->frame_size = s->frame_size_min + 2 * (s->bits_written * s->sample_rate < s->samples_written * s->bit_rate); s->bits_written += s->frame_size * 8; - s->samples_written += AC3_FRAME_SIZE; + s->samples_written += AC3_BLOCK_SIZE * s->num_blocks; } @@ -198,7 +198,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s) /* set coupling use flags for each block/channel */ /* TODO: turn coupling on/off and adjust start band based on bit usage */ - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; for (ch = 1; ch <= s->fbw_channels; ch++) block->channel_in_cpl[ch] = s->cpl_on; @@ -208,7 +208,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s) enabled for that block */ got_cpl_snr = 0; num_cpl_blocks = 0; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; block->num_cpl_channels = 0; for (ch = 1; ch <= s->fbw_channels; ch++) @@ -244,7 +244,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s) s->cpl_on = 0; /* set bandwidth for each channel */ - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; for (ch = 1; ch <= s->fbw_channels; ch++) { if (block->channel_in_cpl[ch]) @@ -269,7 +269,7 @@ void ff_ac3_apply_rematrixing(AC3EncodeContext *s) if (!s->rematrixing_enabled) return; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; if (block->new_rematrixing_strategy) flags = block->rematrixing_flags; @@ -318,7 +318,7 @@ static av_cold void exponent_init(AC3EncodeContext *s) static void extract_exponents(AC3EncodeContext *s) { int ch = !s->cpl_on; - int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS * (s->channels - ch + 1); + int chan_size = AC3_MAX_COEFS * s->num_blocks * (s->channels - ch + 1); AC3Block *block = &s->blocks[0]; s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch], chan_size); @@ -331,6 +331,15 @@ static void extract_exponents(AC3EncodeContext *s) */ #define EXP_DIFF_THRESHOLD 500 +/** + * Table used to select exponent strategy based on exponent reuse block interval. + */ +static const uint8_t exp_strategy_reuse_tab[4][6] = { + { EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 }, + { EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 }, + { EXP_D25, EXP_D25, EXP_D15, EXP_D15, EXP_D15, EXP_D15 }, + { EXP_D45, EXP_D25, EXP_D25, EXP_D15, EXP_D15, EXP_D15 } +}; /** * Calculate exponent strategies for all channels. @@ -349,7 +358,7 @@ static void compute_exp_strategy(AC3EncodeContext *s) reused in the next frame */ exp_strategy[0] = EXP_NEW; exp += AC3_MAX_COEFS; - for (blk = 1; blk < AC3_MAX_BLOCKS; blk++, exp += AC3_MAX_COEFS) { + for (blk = 1; blk < s->num_blocks; blk++, exp += AC3_MAX_COEFS) { if (ch == CPL_CH) { if (!s->blocks[blk-1].cpl_in_use) { exp_strategy[blk] = EXP_NEW; @@ -373,23 +382,18 @@ static void compute_exp_strategy(AC3EncodeContext *s) /* now select the encoding strategy type : if exponents are often recoded, we use a coarse encoding */ blk = 0; - while (blk < AC3_MAX_BLOCKS) { + while (blk < s->num_blocks) { blk1 = blk + 1; - while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) + while (blk1 < s->num_blocks && exp_strategy[blk1] == EXP_REUSE) blk1++; - switch (blk1 - blk) { - case 1: exp_strategy[blk] = EXP_D45; break; - case 2: - case 3: exp_strategy[blk] = EXP_D25; break; - default: exp_strategy[blk] = EXP_D15; break; - } + exp_strategy[blk] = exp_strategy_reuse_tab[s->num_blks_code][blk1-blk-1]; blk = blk1; } } if (s->lfe_on) { ch = s->lfe_channel; s->exp_strategy[ch][0] = EXP_D15; - for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 1; blk < s->num_blocks; blk++) s->exp_strategy[ch][blk] = EXP_REUSE; } @@ -487,7 +491,7 @@ static void encode_exponents(AC3EncodeContext *s) cpl = (ch == CPL_CH); blk = 0; - while (blk < AC3_MAX_BLOCKS) { + while (blk < s->num_blocks) { AC3Block *block = &s->blocks[blk]; if (cpl && !block->cpl_in_use) { exp += AC3_MAX_COEFS; @@ -500,7 +504,7 @@ static void encode_exponents(AC3EncodeContext *s) /* count the number of EXP_REUSE blocks after the current block and set exponent reference block numbers */ s->exp_ref_block[ch][blk] = blk; - while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) { + while (blk1 < s->num_blocks && exp_strategy[blk1] == EXP_REUSE) { s->exp_ref_block[ch][blk1] = blk; blk1++; } @@ -536,7 +540,7 @@ static void group_exponents(AC3EncodeContext *s) int exp0, exp1; bit_count = 0; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; for (ch = !block->cpl_in_use; ch <= s->channels; ch++) { int exp_strategy = s->exp_strategy[ch][blk]; @@ -625,30 +629,38 @@ static void count_frame_bits_fixed(AC3EncodeContext *s) if (s->eac3) { /* bitstream info header */ frame_bits += 35; - frame_bits += 1 + 1 + 1; + frame_bits += 1 + 1; + if (s->num_blocks != 0x6) + frame_bits++; + frame_bits++; /* audio frame header */ - frame_bits += 2; + if (s->num_blocks == 6) + frame_bits += 2; frame_bits += 10; /* exponent strategy */ if (s->use_frame_exp_strategy) frame_bits += 5 * s->fbw_channels; else - frame_bits += AC3_MAX_BLOCKS * 2 * s->fbw_channels; + frame_bits += s->num_blocks * 2 * s->fbw_channels; if (s->lfe_on) - frame_bits += AC3_MAX_BLOCKS; + frame_bits += s->num_blocks; /* converter exponent strategy */ - frame_bits += s->fbw_channels * 5; + if (s->num_blks_code != 0x3) + frame_bits++; + else + frame_bits += s->fbw_channels * 5; /* snr offsets */ frame_bits += 10; /* block start info */ - frame_bits++; + if (s->num_blocks != 1) + frame_bits++; } else { frame_bits += 49; frame_bits += frame_bits_inc[s->channel_mode]; } /* audio blocks */ - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { if (!s->eac3) { /* block switch flags */ frame_bits += s->fbw_channels; @@ -750,7 +762,7 @@ static void count_frame_bits(AC3EncodeContext *s) /* coupling */ if (s->channel_mode > AC3_CHMODE_MONO) { frame_bits++; - for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 1; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; frame_bits++; if (block->new_cpl_strategy) @@ -762,7 +774,7 @@ static void count_frame_bits(AC3EncodeContext *s) if (s->use_frame_exp_strategy) { frame_bits += 5 * s->cpl_on; } else { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 0; blk < s->num_blocks; blk++) frame_bits += 2 * s->blocks[blk].cpl_in_use; } } @@ -778,7 +790,7 @@ static void count_frame_bits(AC3EncodeContext *s) } /* audio blocks */ - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; /* coupling strategy */ @@ -865,7 +877,7 @@ static void bit_alloc_masking(AC3EncodeContext *s) { int blk, ch; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; for (ch = !block->cpl_in_use; ch <= s->channels; ch++) { /* We only need psd and mask for calculating bap. @@ -901,9 +913,9 @@ static void reset_block_bap(AC3EncodeContext *s) ref_bap = s->bap_buffer; for (ch = 0; ch <= s->channels; ch++) { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 0; blk < s->num_blocks; blk++) s->ref_bap[ch][blk] = ref_bap + AC3_MAX_COEFS * s->exp_ref_block[ch][blk]; - ref_bap += AC3_MAX_COEFS * AC3_MAX_BLOCKS; + ref_bap += AC3_MAX_COEFS * s->num_blocks; } s->ref_bap_set = 1; } @@ -936,7 +948,7 @@ static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch, { int blk; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; if (ch == CPL_CH && !block->cpl_in_use) continue; @@ -980,7 +992,7 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset) snr_offset = (snr_offset - 240) << 2; reset_block_bap(s); - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; for (ch = !block->cpl_in_use; ch <= s->channels; ch++) { @@ -1194,7 +1206,7 @@ void ff_ac3_quantize_mantissas(AC3EncodeContext *s) { int blk, ch, ch0=0, got_cpl; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; AC3Mant m = { 0 }; @@ -1557,7 +1569,7 @@ void ff_ac3_output_frame(AC3EncodeContext *s, unsigned char *frame) s->output_frame_header(s); - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 0; blk < s->num_blocks; blk++) output_audio_block(s, blk); output_frame_end(s); @@ -1585,6 +1597,7 @@ static void dprint_options(AC3EncodeContext *s) av_dlog(avctx, "channel_layout: %s\n", strbuf); av_dlog(avctx, "sample_rate: %d\n", s->sample_rate); av_dlog(avctx, "bit_rate: %d\n", s->bit_rate); + av_dlog(avctx, "blocks/frame: %d (code=%d)\n", s->num_blocks, s->num_blks_code); if (s->cutoff) av_dlog(avctx, "cutoff: %d\n", s->cutoff); @@ -1851,7 +1864,7 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx) av_freep(&s->qmant_buffer); av_freep(&s->cpl_coord_exp_buffer); av_freep(&s->cpl_coord_mant_buffer); - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; av_freep(&block->mdct_coef); av_freep(&block->fixed_coef); @@ -1958,18 +1971,30 @@ static av_cold int validate_options(AC3EncodeContext *s) /* validate bit rate */ if (s->eac3) { int max_br, min_br, wpf, min_br_dist, min_br_code; + int num_blks_code, num_blocks, frame_samples; /* calculate min/max bitrate */ - max_br = 2048 * s->sample_rate / AC3_FRAME_SIZE * 16; - min_br = ((s->sample_rate + (AC3_FRAME_SIZE-1)) / AC3_FRAME_SIZE) * 16; + /* TODO: More testing with 3 and 2 blocks. All E-AC-3 samples I've + found use either 6 blocks or 1 block, even though 2 or 3 blocks + would work as far as the bit rate is concerned. */ + for (num_blks_code = 3; num_blks_code >= 0; num_blks_code--) { + num_blocks = ((int[]){ 1, 2, 3, 6 })[num_blks_code]; + frame_samples = AC3_BLOCK_SIZE * num_blocks; + max_br = 2048 * s->sample_rate / frame_samples * 16; + min_br = ((s->sample_rate + (frame_samples-1)) / frame_samples) * 16; + if (avctx->bit_rate <= max_br) + break; + } if (avctx->bit_rate < min_br || avctx->bit_rate > max_br) { av_log(avctx, AV_LOG_ERROR, "invalid bit rate. must be %d to %d " "for this sample rate\n", min_br, max_br); return AVERROR(EINVAL); } + s->num_blks_code = num_blks_code; + s->num_blocks = num_blocks; /* calculate words-per-frame for the selected bitrate */ - wpf = (avctx->bit_rate / 16) * AC3_FRAME_SIZE / s->sample_rate; + wpf = (avctx->bit_rate / 16) * frame_samples / s->sample_rate; av_assert1(wpf > 0 && wpf <= 2048); /* find the closest AC-3 bitrate code to the selected bitrate. @@ -2001,6 +2026,8 @@ static av_cold int validate_options(AC3EncodeContext *s) } s->frame_size_code = i << 1; s->frame_size_min = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code]; + s->num_blks_code = 0x3; + s->num_blocks = 6; } s->bit_rate = avctx->bit_rate; s->frame_size = s->frame_size_min; @@ -2065,13 +2092,13 @@ static av_cold void set_bandwidth(AC3EncodeContext *s) /* set number of coefficients for each channel */ for (ch = 1; ch <= s->fbw_channels; ch++) { s->start_freq[ch] = 0; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 0; blk < s->num_blocks; blk++) s->blocks[blk].end_freq[ch] = s->bandwidth_code * 3 + 73; } /* LFE channel always has 7 coefs */ if (s->lfe_on) { s->start_freq[s->lfe_channel] = 0; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 0; blk < s->num_blocks; blk++) s->blocks[blk].end_freq[ch] = 7; } @@ -2108,7 +2135,7 @@ static av_cold void set_bandwidth(AC3EncodeContext *s) s->start_freq[CPL_CH] = cpl_start_band * 12 + 37; s->cpl_end_freq = cpl_end_band * 12 + 37; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 0; blk < s->num_blocks; blk++) s->blocks[blk].end_freq[CPL_CH] = s->cpl_end_freq; } } @@ -2119,35 +2146,37 @@ static av_cold int allocate_buffers(AC3EncodeContext *s) AVCodecContext *avctx = s->avctx; int blk, ch; int channels = s->channels + 1; /* includes coupling channel */ + int channel_blocks = channels * s->num_blocks; + int total_coefs = AC3_MAX_COEFS * channel_blocks; if (s->allocate_sample_buffers(s)) goto alloc_fail; - FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * channels * - AC3_MAX_COEFS * sizeof(*s->bap_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels * - AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail); - FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels * - AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels * - AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels * - 128 * sizeof(*s->grouped_exp_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * channels * - AC3_MAX_COEFS * sizeof(*s->psd_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * channels * - 64 * sizeof(*s->band_psd_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * channels * - 64 * sizeof(*s->mask_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * channels * - AC3_MAX_COEFS * sizeof(*s->qmant_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, total_coefs * + sizeof(*s->bap_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, total_coefs * + sizeof(*s->bap1_buffer), alloc_fail); + FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, total_coefs * + sizeof(*s->mdct_coef_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, total_coefs * + sizeof(*s->exp_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, channel_blocks * 128 * + sizeof(*s->grouped_exp_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, total_coefs * + sizeof(*s->psd_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, channel_blocks * 64 * + sizeof(*s->band_psd_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, channel_blocks * 64 * + sizeof(*s->mask_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, total_coefs * + sizeof(*s->qmant_buffer), alloc_fail); if (s->cpl_enabled) { - FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, AC3_MAX_BLOCKS * channels * - 16 * sizeof(*s->cpl_coord_exp_buffer), alloc_fail); - FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, AC3_MAX_BLOCKS * channels * - 16 * sizeof(*s->cpl_coord_mant_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, channel_blocks * 16 * + sizeof(*s->cpl_coord_exp_buffer), alloc_fail); + FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, channel_blocks * 16 * + sizeof(*s->cpl_coord_mant_buffer), alloc_fail); } - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef), alloc_fail); @@ -2183,23 +2212,23 @@ static av_cold int allocate_buffers(AC3EncodeContext *s) } /* arrangement: channel, block, coeff */ - block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; - block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; + block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (s->num_blocks * ch + blk)]; + block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (s->num_blocks * ch + blk)]; } } if (!s->fixed_point) { - FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels * - AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail); - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, total_coefs * + sizeof(*s->fixed_coef_buffer), alloc_fail); + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels * sizeof(*block->fixed_coef), alloc_fail); for (ch = 0; ch < channels; ch++) - block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; + block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)]; } } else { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels * sizeof(*block->fixed_coef), alloc_fail); @@ -2226,14 +2255,14 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx) s->eac3 = avctx->codec_id == CODEC_ID_EAC3; - avctx->frame_size = AC3_FRAME_SIZE; - ff_ac3_common_init(); ret = validate_options(s); if (ret) return ret; + avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks; + s->bitstream_mode = avctx->audio_service_type; if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE) s->bitstream_mode = 0x7; diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h index f00f1cf190..af104b60dd 100644 --- a/libavcodec/ac3enc.h +++ b/libavcodec/ac3enc.h @@ -152,6 +152,8 @@ typedef struct AC3EncodeContext { int bit_rate; ///< target bit rate, in bits-per-second int sample_rate; ///< sampling frequency, in Hz + int num_blks_code; ///< number of blocks code (numblkscod) + int num_blocks; ///< number of blocks per frame int frame_size_min; ///< minimum frame size in case rounding is necessary int frame_size; ///< current frame size in bytes int frame_size_code; ///< frame size code (frmsizecod) diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c index 61536971d1..906b0a594e 100644 --- a/libavcodec/ac3enc_fixed.c +++ b/libavcodec/ac3enc_fixed.c @@ -93,7 +93,7 @@ static void scale_coefficients(AC3EncodeContext *s) { int blk, ch; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; for (ch = 1; ch <= s->channels; ch++) { s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS, diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index fa19a210b0..cb75314164 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -103,7 +103,7 @@ static int normalize_samples(AC3EncodeContext *s) */ static void scale_coefficients(AC3EncodeContext *s) { - int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS; + int chan_size = AC3_MAX_COEFS * s->num_blocks; s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size, s->mdct_coef_buffer + chan_size, chan_size * s->channels); diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c index 9b9151b3e0..c4e2a121bf 100644 --- a/libavcodec/ac3enc_template.c +++ b/libavcodec/ac3enc_template.c @@ -79,13 +79,13 @@ static void deinterleave_input_samples(AC3EncodeContext *s, int sinc; /* copy last 256 samples of previous frame to the start of the current frame */ - memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE], + memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_BLOCK_SIZE * s->num_blocks], AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0])); /* deinterleave */ sinc = s->channels; sptr = samples + s->channel_map[ch]; - for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) { + for (i = AC3_BLOCK_SIZE; i < AC3_BLOCK_SIZE * (s->num_blocks + 1); i++) { s->planar_samples[ch][i] = *sptr; sptr += sinc; } @@ -103,7 +103,7 @@ static void apply_mdct(AC3EncodeContext *s) int blk, ch; for (ch = 0; ch < s->channels; ch++) { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE]; @@ -159,7 +159,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) cpl_start = FFMIN(256, cpl_start + num_cpl_coefs) - num_cpl_coefs; /* calculate coupling channel from fbw channels */ - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; CoefType *cpl_coef = &block->mdct_coef[CPL_CH][cpl_start]; if (!block->cpl_in_use) @@ -188,7 +188,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) while (i < s->cpl_end_freq) { int band_size = s->cpl_band_sizes[bnd]; for (ch = CPL_CH; ch <= s->fbw_channels; ch++) { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch])) continue; @@ -203,7 +203,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) } /* determine which blocks to send new coupling coordinates for */ - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL; int new_coords = 0; @@ -261,7 +261,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) coordinates in successive blocks */ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) { blk = 0; - while (blk < AC3_MAX_BLOCKS) { + while (blk < s->num_blocks) { int blk1; CoefSumType energy_cpl; AC3Block *block = &s->blocks[blk]; @@ -273,7 +273,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) energy_cpl = energy[blk][CPL_CH][bnd]; blk1 = blk+1; - while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) { + while (!s->blocks[blk1].new_cpl_coords && blk1 < s->num_blocks) { if (s->blocks[blk1].cpl_in_use) energy_cpl += energy[blk1][CPL_CH][bnd]; blk1++; @@ -285,7 +285,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) continue; energy_ch = energy[blk][ch][bnd]; blk1 = blk+1; - while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) { + while (!s->blocks[blk1].new_cpl_coords && blk1 < s->num_blocks) { if (s->blocks[blk1].cpl_in_use) energy_ch += energy[blk1][ch][bnd]; blk1++; @@ -297,7 +297,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) } /* calculate exponents/mantissas for coupling coordinates */ - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; if (!block->cpl_in_use || !block->new_cpl_coords) continue; @@ -362,7 +362,7 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s) if (s->channel_mode != AC3_CHMODE_STEREO) return; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { block = &s->blocks[blk]; block->new_rematrixing_strategy = !blk; @@ -440,7 +440,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame, scale_coefficients(s); clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1], - AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels); + AC3_MAX_COEFS * s->num_blocks * s->channels); s->cpl_on = s->cpl_enabled; ff_ac3_compute_coupling_strategy(s); diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index d0d2a621c2..d8f999dfdc 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -270,7 +270,7 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; @@ -321,7 +321,8 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) c->put_pixels_clamped = put_pixels_clamped_mvi_asm; c->add_pixels_clamped = add_pixels_clamped_mvi_asm; - c->get_pixels = get_pixels_mvi; + if (!high_bit_depth) + c->get_pixels = get_pixels_mvi; c->diff_pixels = diff_pixels_mvi; c->sad[0] = pix_abs16x16_mvi_asm; c->sad[1] = pix_abs8x8_mvi; diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 5728d709e7..ccbe1ed296 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -75,7 +75,7 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block) void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index 1fc636b12d..fb0d00973e 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -72,7 +72,7 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size); void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && (avctx->idct_algo == FF_IDCT_AUTO || @@ -106,8 +106,9 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6; } + if (!high_bit_depth) + c->get_pixels = ff_get_pixels_armv6; c->add_pixels_clamped = ff_add_pixels_clamped_armv6; - c->get_pixels = ff_get_pixels_armv6; c->diff_pixels = ff_diff_pixels_armv6; c->pix_abs[0][0] = ff_pix_abs16_armv6; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index c8b6b6ebae..3b0de32a14 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -175,7 +175,7 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) { if (avctx->idct_algo == FF_IDCT_AUTO || diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c index 85be83148a..2837af119f 100644 --- a/libavcodec/arm/dsputil_iwmmxt.c +++ b/libavcodec/arm/dsputil_iwmmxt.c @@ -155,7 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) { int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */ - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (avctx->dsp_mask) { if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 9e5cbd52ff..c854958c60 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2278,6 +2278,23 @@ typedef struct AVCodecContext { #define FF_PROFILE_VC1_COMPLEX 2 #define FF_PROFILE_VC1_ADVANCED 3 +#define FF_PROFILE_MPEG4_SIMPLE 0 +#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE 1 +#define FF_PROFILE_MPEG4_CORE 2 +#define FF_PROFILE_MPEG4_MAIN 3 +#define FF_PROFILE_MPEG4_N_BIT 4 +#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE 5 +#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION 6 +#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE 7 +#define FF_PROFILE_MPEG4_HYBRID 8 +#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME 9 +#define FF_PROFILE_MPEG4_CORE_SCALABLE 10 +#define FF_PROFILE_MPEG4_ADVANCED_CODING 11 +#define FF_PROFILE_MPEG4_ADVANCED_CORE 12 +#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13 +#define FF_PROFILE_MPEG4_SIMPLE_STUDIO 14 +#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE 15 + /** * level * - encoding: Set by user. diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c index eade153440..bfcc337388 100644 --- a/libavcodec/bfin/dsputil_bfin.c +++ b/libavcodec/bfin/dsputil_bfin.c @@ -197,14 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->get_pixels = ff_bfin_get_pixels; c->diff_pixels = ff_bfin_diff_pixels; c->put_pixels_clamped = ff_bfin_put_pixels_clamped; c->add_pixels_clamped = ff_bfin_add_pixels_clamped; if (!high_bit_depth) + c->get_pixels = ff_bfin_get_pixels; c->clear_blocks = bfin_clear_blocks; c->pix_sum = ff_bfin_pix_sum; c->pix_norm1 = ff_bfin_pix_norm1; @@ -253,10 +253,10 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) /* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */ } - if (avctx->dct_algo == FF_DCT_AUTO) - c->fdct = ff_bfin_fdct; - if (avctx->bits_per_raw_sample <= 8) { + if (avctx->dct_algo == FF_DCT_AUTO) + c->fdct = ff_bfin_fdct; + if (avctx->idct_algo == FF_IDCT_VP3) { c->idct_permutation_type = FF_NO_IDCT_PERM; c->idct = ff_bfin_vp3_idct; diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c index de6582ca08..9e1e99672b 100644 --- a/libavcodec/dct-test.c +++ b/libavcodec/dct-test.c @@ -88,7 +88,7 @@ static const struct algo fdct_tab[] = { { "REF-DBL", ff_ref_fdct, NO_PERM }, { "FAAN", ff_faandct, FAAN_SCALE }, { "IJG-AAN-INT", fdct_ifast, SCALE_PERM }, - { "IJG-LLM-INT", ff_jpeg_fdct_islow, NO_PERM }, + { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM }, #if HAVE_MMX { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c index d4aefeee12..536636df94 100644 --- a/libavcodec/dnxhddata.c +++ b/libavcodec/dnxhddata.c @@ -22,6 +22,28 @@ #include "avcodec.h" #include "dnxhddata.h" +static const uint8_t dnxhd_1235_luma_weight[] = { + 0, 32, 32, 32, 33, 35, 38, 39, + 32, 33, 32, 33, 36, 36, 39, 42, + 32, 32, 33, 36, 35, 37, 41, 43, + 31, 33, 34, 36, 36, 40, 42, 48, + 32, 34, 36, 37, 39, 42, 46, 51, + 36, 37, 37, 39, 41, 46, 51, 55, + 37, 39, 41, 41, 47, 50, 55, 56, + 41, 42, 41, 44, 50, 53, 60, 60 +}; + +static const uint8_t dnxhd_1235_chroma_weight[] = { + 0, 32, 33, 34, 39, 41, 54, 59, + 33, 34, 35, 38, 43, 49, 58, 84, + 34, 37, 39, 44, 46, 55, 74, 87, + 40, 42, 47, 48, 58, 70, 87, 86, + 43, 50, 56, 63, 72, 94, 91, 82, + 55, 63, 65, 75, 93, 89, 85, 73, + 61, 67, 82, 81, 83, 90, 79, 73, + 74, 84, 75, 78, 90, 85, 73, 73 +}; + static const uint8_t dnxhd_1237_luma_weight[] = { 0, 32, 33, 34, 34, 36, 37, 36, 36, 37, 38, 38, 38, 39, 41, 44, @@ -132,6 +154,28 @@ static const uint8_t dnxhd_1243_chroma_weight[] = { 46, 45, 46, 47, 47, 48, 47, 47, }; +static const uint8_t dnxhd_1250_luma_weight[] = { + 0, 32, 35, 35, 36, 36, 41, 43, + 32, 34, 35, 36, 37, 39, 43, 47, + 33, 34, 36, 38, 38, 42, 42, 50, + 34, 36, 38, 38, 41, 40, 47, 54, + 35, 38, 39, 40, 39, 45, 49, 58, + 38, 39, 40, 39, 46, 47, 54, 60, + 38, 39, 41, 46, 46, 48, 57, 62, + 40, 41, 44, 45, 49, 54, 63, 63 +}; + +static const uint8_t dnxhd_1250_chroma_weight[] = { + 0, 32, 35, 36, 40, 42, 51, 51, + 35, 36, 39, 39, 43, 51, 52, 55, + 36, 41, 41, 43, 51, 53, 54, 56, + 43, 44, 45, 50, 54, 54, 55, 57, + 45, 48, 50, 51, 55, 58, 59, 58, + 49, 52, 49, 57, 58, 62, 58, 60, + 51, 51, 56, 58, 62, 61, 59, 62, + 52, 52, 60, 61, 59, 59, 63, 63 +}; + static const uint8_t dnxhd_1251_luma_weight[] = { 0, 32, 32, 34, 34, 34, 34, 35, 35, 35, 36, 37, 36, 36, 35, 36, @@ -604,6 +648,146 @@ static const uint8_t dnxhd_1235_1241_run[62] = { 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, }; +static const uint8_t dnxhd_1250_dc_codes[14] = { + 10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127 +}; +static const uint8_t dnxhd_1250_dc_bits[14] = { + 4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7 +}; +static const uint16_t dnxhd_1250_ac_codes[257] = { + 0, 1, 4, 10, 11, 24, 25, 26, + 54, 55, 56, 57, 116, 117, 118, 119, + 240, 241, 242, 243, 244, 245, 492, 493, + 494, 495, 496, 497, 498, 998, 999, 1000, + 1001, 1002, 1003, 1004, 1005, 1006, 2014, 2015, + 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, + 2024, 2025, 4052, 4053, 4054, 4055, 4056, 4057, + 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, + 4066, 4067, 8136, 8137, 8138, 8139, 8140, 8141, + 8142, 8143, 8144, 8145, 8146, 8147, 8148, 8149, + 8150, 8151, 8152, 8153, 8154, 8155, 8156, 16314, + 16315, 16316, 16317, 16318, 16319, 16320, 16321, 16322, + 16323, 16324, 16325, 16326, 16327, 16328, 16329, 16330, + 16331, 16332, 16333, 16334, 16335, 16336, 16337, 16338, + 32678, 32679, 32680, 32681, 32682, 32683, 32684, 32685, + 32686, 32687, 32688, 32689, 32690, 32691, 32692, 32693, + 32694, 32695, 32696, 32697, 32698, 32699, 32700, 32701, + 32702, 32703, 32704, 32705, 32706, 32707, 32708, 32709, + 32710, 32711, 32712, 65426, 65427, 65428, 65429, 65430, + 65431, 65432, 65433, 65434, 65435, 65436, 65437, 65438, + 65439, 65440, 65441, 65442, 65443, 65444, 65445, 65446, + 65447, 65448, 65449, 65450, 65451, 65452, 65453, 65454, + 65455, 65456, 65457, 65458, 65459, 65460, 65461, 65462, + 65463, 65464, 65465, 65466, 65467, 65468, 65469, 65470, + 65471, 65472, 65473, 65474, 65475, 65476, 65477, 65478, + 65479, 65480, 65481, 65482, 65483, 65484, 65485, 65486, + 65487, 65488, 65489, 65490, 65491, 65492, 65493, 65494, + 65495, 65496, 65497, 65498, 65499, 65500, 65501, 65502, + 65503, 65504, 65505, 65506, 65507, 65508, 65509, 65510, + 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65518, + 65519, 65520, 65521, 65522, 65523, 65524, 65525, 65526, + 65527, 65528, 65529, 65530, 65531, 65532, 65533, 65534, + 65535 +}; +static const uint8_t dnxhd_1250_ac_bits[257] = { + 2, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16 +}; +static const uint8_t dnxhd_1250_ac_level[257] = { + 1, 1, 2, 3, 0, 4, 5, 2, 6, 7, 8, 3, 9, 10, 11, 4, + 12, 13, 14, 15, 16, 5, 17, 18, 19, 20, 21, 22, 6, 23, 24, 25, + 26, 27, 28, 29, 7, 8, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 9, 10, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 11, + 12, 13, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2, + 3, 4, 5, 14, 15, 16, 17, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 18, 19, 20, 21, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 22, 23, 24, + 25, 26, 27, 54, 57, 58, 59, 60, 61, 62, 63, 64, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64 +}; +static const uint8_t dnxhd_1250_ac_run_flag[257] = { + 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1 +}; +static const uint8_t dnxhd_1250_ac_index_flag[257] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1 +}; +static const uint16_t dnxhd_1250_run_codes[62] = { + 0, 4, 5, 12, 26, 27, 28, 58, + 118, 119, 120, 242, 486, 487, 976, 977, + 978, 979, 980, 981, 982, 983, 984, 985, + 986, 987, 988, 989, 990, 991, 992, 993, + 994, 995, 996, 997, 998, 999, 1000, 1001, + 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, + 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, + 1018, 1019, 1020, 1021, 1022, 1023 +}; +static const uint8_t dnxhd_1250_run_bits[62] = { + 1, 3, 3, 4, 5, 5, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; +static const uint8_t dnxhd_1250_run[62] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62 +}; + static const uint8_t dnxhd_1251_dc_codes[12] = { 0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63, }; @@ -878,6 +1062,13 @@ static const uint8_t dnxhd_1252_ac_index_flag[257] = { }; const CIDEntry ff_dnxhd_cid_table[] = { + { 1235, 1920, 1080, 0, 917504, 917504, 6, 10, + dnxhd_1235_luma_weight, dnxhd_1235_chroma_weight, + dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits, + dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level, + dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag, + dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run, + { 175, 185, 365, 440 } }, { 1237, 1920, 1080, 0, 606208, 606208, 4, 8, dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight, dnxhd_1237_dc_codes, dnxhd_1237_dc_bits, @@ -913,6 +1104,13 @@ const CIDEntry ff_dnxhd_cid_table[] = { dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag, dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run, { 185, 220 } }, + { 1250, 1280, 720, 0, 458752, 458752, 6, 10, + dnxhd_1250_luma_weight, dnxhd_1250_chroma_weight, + dnxhd_1250_dc_codes, dnxhd_1250_dc_bits, + dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_level, + dnxhd_1250_ac_run_flag, dnxhd_1250_ac_index_flag, + dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run, + { 90, 180, 220 } }, { 1251, 1280, 720, 0, 458752, 458752, 4, 8, dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight, dnxhd_1251_dc_codes, dnxhd_1251_dc_bits, @@ -945,7 +1143,7 @@ int ff_dnxhd_get_cid_table(int cid) return -1; } -int ff_dnxhd_find_cid(AVCodecContext *avctx) +int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth) { int i, j; int mbs = avctx->bit_rate/1000000; @@ -955,7 +1153,7 @@ int ff_dnxhd_find_cid(AVCodecContext *avctx) const CIDEntry *cid = &ff_dnxhd_cid_table[i]; if (cid->width == avctx->width && cid->height == avctx->height && cid->interlaced == !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT) && - cid->bit_depth == 8) { // until 10 bit is supported + cid->bit_depth == bit_depth) { for (j = 0; j < sizeof(cid->bit_rates); j++) { if (cid->bit_rates[j] == mbs) return cid->cid; diff --git a/libavcodec/dnxhddata.h b/libavcodec/dnxhddata.h index 32c77db0ef..4d03a600f4 100644 --- a/libavcodec/dnxhddata.h +++ b/libavcodec/dnxhddata.h @@ -46,6 +46,6 @@ typedef struct { extern const CIDEntry ff_dnxhd_cid_table[]; int ff_dnxhd_get_cid_table(int cid); -int ff_dnxhd_find_cid(AVCodecContext *avctx); +int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth); #endif /* AVCODEC_DNXHDDATA_H */ diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c index 43c4679f69..a7ad620fd8 100644 --- a/libavcodec/dnxhddec.c +++ b/libavcodec/dnxhddec.c @@ -1,6 +1,9 @@ /* * VC3/DNxHD decoder. * Copyright (c) 2007 SmartJog S.A., Baptiste Coudurier <baptiste dot coudurier at smartjog dot com> + * Copyright (c) 2011 MirriAd Ltd + * + * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com> * * This file is part of FFmpeg. * @@ -28,7 +31,7 @@ #include "dnxhddata.h" #include "dsputil.h" -typedef struct { +typedef struct DNXHDContext { AVCodecContext *avctx; AVFrame picture; GetBitContext gb; @@ -43,17 +46,22 @@ typedef struct { DECLARE_ALIGNED(16, DCTELEM, blocks)[8][64]; ScanTable scantable; const CIDEntry *cid_table; + int bit_depth; // 8, 10 or 0 if not initialized at all. + void (*decode_dct_block)(struct DNXHDContext *ctx, DCTELEM *block, + int n, int qscale); } DNXHDContext; #define DNXHD_VLC_BITS 9 #define DNXHD_DC_VLC_BITS 7 +static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block, int n, int qscale); +static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block, int n, int qscale); + static av_cold int dnxhd_decode_init(AVCodecContext *avctx) { DNXHDContext *ctx = avctx->priv_data; ctx->avctx = avctx; - dsputil_init(&ctx->dsp, avctx); avctx->coded_frame = &ctx->picture; avcodec_get_frame_defaults(&ctx->picture); ctx->picture.type = AV_PICTURE_TYPE_I; @@ -79,7 +87,7 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, int cid) init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257, ctx->cid_table->ac_bits, 1, 1, ctx->cid_table->ac_codes, 2, 2, 0); - init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->cid_table->bit_depth+4, + init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->bit_depth + 4, ctx->cid_table->dc_bits, 1, 1, ctx->cid_table->dc_codes, 1, 1, 0); init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62, @@ -117,8 +125,21 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si av_dlog(ctx->avctx, "width %d, heigth %d\n", ctx->width, ctx->height); if (buf[0x21] & 0x40) { - av_log(ctx->avctx, AV_LOG_ERROR, "10 bit per component\n"); - return -1; + ctx->avctx->pix_fmt = PIX_FMT_YUV422P10; + ctx->avctx->bits_per_raw_sample = 10; + if (ctx->bit_depth != 10) { + dsputil_init(&ctx->dsp, ctx->avctx); + ctx->bit_depth = 10; + ctx->decode_dct_block = dnxhd_decode_dct_block_10; + } + } else { + ctx->avctx->pix_fmt = PIX_FMT_YUV422P; + ctx->avctx->bits_per_raw_sample = 8; + if (ctx->bit_depth != 8) { + dsputil_init(&ctx->dsp, ctx->avctx); + ctx->bit_depth = 8; + ctx->decode_dct_block = dnxhd_decode_dct_block_8; + } } cid = AV_RB32(buf + 0x28); @@ -158,79 +179,103 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si return 0; } -static int dnxhd_decode_dc(DNXHDContext *ctx) +static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx, + DCTELEM *block, int n, + int qscale, + int index_bits, + int level_bias, + int level_shift) { - int len; - - len = get_vlc2(&ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1); - return len ? get_xbits(&ctx->gb, len) : 0; -} - -static void dnxhd_decode_dct_block(DNXHDContext *ctx, DCTELEM *block, int n, int qscale) -{ - int i, j, index, index2; + int i, j, index1, index2, len; int level, component, sign; - const uint8_t *weigth_matrix; + const uint8_t *weight_matrix; + OPEN_READER(bs, &ctx->gb); if (n&2) { component = 1 + (n&1); - weigth_matrix = ctx->cid_table->chroma_weight; + weight_matrix = ctx->cid_table->chroma_weight; } else { component = 0; - weigth_matrix = ctx->cid_table->luma_weight; + weight_matrix = ctx->cid_table->luma_weight; } - ctx->last_dc[component] += dnxhd_decode_dc(ctx); + UPDATE_CACHE(bs, &ctx->gb); + GET_VLC(len, bs, &ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1); + if (len) { + level = GET_CACHE(bs, &ctx->gb); + LAST_SKIP_BITS(bs, &ctx->gb, len); + sign = ~level >> 31; + level = (NEG_USR32(sign ^ level, len) ^ sign) - sign; + ctx->last_dc[component] += level; + } block[0] = ctx->last_dc[component]; //av_log(ctx->avctx, AV_LOG_DEBUG, "dc %d\n", block[0]); + for (i = 1; ; i++) { - index = get_vlc2(&ctx->gb, ctx->ac_vlc.table, DNXHD_VLC_BITS, 2); - //av_log(ctx->avctx, AV_LOG_DEBUG, "index %d\n", index); - level = ctx->cid_table->ac_level[index]; + UPDATE_CACHE(bs, &ctx->gb); + GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table, + DNXHD_VLC_BITS, 2); + //av_log(ctx->avctx, AV_LOG_DEBUG, "index %d\n", index1); + level = ctx->cid_table->ac_level[index1]; if (!level) { /* EOB */ //av_log(ctx->avctx, AV_LOG_DEBUG, "EOB\n"); - return; + break; } - sign = get_sbits(&ctx->gb, 1); - if (ctx->cid_table->ac_index_flag[index]) { - level += get_bits(&ctx->gb, ctx->cid_table->index_bits)<<6; + sign = SHOW_SBITS(bs, &ctx->gb, 1); + SKIP_BITS(bs, &ctx->gb, 1); + + if (ctx->cid_table->ac_index_flag[index1]) { + level += SHOW_UBITS(bs, &ctx->gb, index_bits) << 6; + SKIP_BITS(bs, &ctx->gb, index_bits); } - if (ctx->cid_table->ac_run_flag[index]) { - index2 = get_vlc2(&ctx->gb, ctx->run_vlc.table, DNXHD_VLC_BITS, 2); + if (ctx->cid_table->ac_run_flag[index1]) { + UPDATE_CACHE(bs, &ctx->gb); + GET_VLC(index2, bs, &ctx->gb, ctx->run_vlc.table, + DNXHD_VLC_BITS, 2); i += ctx->cid_table->run[index2]; } if (i > 63) { av_log(ctx->avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", n, i); - return; + break; } j = ctx->scantable.permutated[i]; //av_log(ctx->avctx, AV_LOG_DEBUG, "j %d\n", j); - //av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weigth %d\n", level, weigth_matrix[i]); - level = (2*level+1) * qscale * weigth_matrix[i]; - if (ctx->cid_table->bit_depth == 10) { - if (weigth_matrix[i] != 8) - level += 8; - level >>= 4; - } else { - if (weigth_matrix[i] != 32) - level += 32; - level >>= 6; - } + //av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weight %d\n", level, weight_matrix[i]); + level = (2*level+1) * qscale * weight_matrix[i]; + if (weight_matrix[i] != level_bias) + level += level_bias; + level >>= level_shift; + //av_log(NULL, AV_LOG_DEBUG, "i %d, j %d, end level %d\n", i, j, level); block[j] = (level^sign) - sign; } + + CLOSE_READER(bs, &ctx->gb); +} + +static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block, + int n, int qscale) +{ + dnxhd_decode_dct_block(ctx, block, n, qscale, 4, 32, 6); +} + +static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block, + int n, int qscale) +{ + dnxhd_decode_dct_block(ctx, block, n, qscale, 6, 8, 4); } static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y) { + int shift1 = ctx->bit_depth == 10; int dct_linesize_luma = ctx->picture.linesize[0]; int dct_linesize_chroma = ctx->picture.linesize[1]; uint8_t *dest_y, *dest_u, *dest_v; - int dct_offset; + int dct_y_offset, dct_x_offset; int qscale, i; qscale = get_bits(&ctx->gb, 11); @@ -239,7 +284,7 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y) for (i = 0; i < 8; i++) { ctx->dsp.clear_block(ctx->blocks[i]); - dnxhd_decode_dct_block(ctx, ctx->blocks[i], i, qscale); + ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); } if (ctx->picture.interlaced_frame) { @@ -247,9 +292,9 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y) dct_linesize_chroma <<= 1; } - dest_y = ctx->picture.data[0] + ((y * dct_linesize_luma) << 4) + (x << 4); - dest_u = ctx->picture.data[1] + ((y * dct_linesize_chroma) << 4) + (x << 3); - dest_v = ctx->picture.data[2] + ((y * dct_linesize_chroma) << 4) + (x << 3); + dest_y = ctx->picture.data[0] + ((y * dct_linesize_luma) << 4) + (x << (4 + shift1)); + dest_u = ctx->picture.data[1] + ((y * dct_linesize_chroma) << 4) + (x << (3 + shift1)); + dest_v = ctx->picture.data[2] + ((y * dct_linesize_chroma) << 4) + (x << (3 + shift1)); if (ctx->cur_field) { dest_y += ctx->picture.linesize[0]; @@ -257,18 +302,19 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y) dest_v += ctx->picture.linesize[2]; } - dct_offset = dct_linesize_luma << 3; - ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]); - ctx->dsp.idct_put(dest_y + 8, dct_linesize_luma, ctx->blocks[1]); - ctx->dsp.idct_put(dest_y + dct_offset, dct_linesize_luma, ctx->blocks[4]); - ctx->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize_luma, ctx->blocks[5]); + dct_y_offset = dct_linesize_luma << 3; + dct_x_offset = 8 << shift1; + ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]); + ctx->dsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]); + ctx->dsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[4]); + ctx->dsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]); if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) { - dct_offset = dct_linesize_chroma << 3; - ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]); - ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]); - ctx->dsp.idct_put(dest_u + dct_offset, dct_linesize_chroma, ctx->blocks[6]); - ctx->dsp.idct_put(dest_v + dct_offset, dct_linesize_chroma, ctx->blocks[7]); + dct_y_offset = dct_linesize_chroma << 3; + ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]); + ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]); + ctx->dsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]); + ctx->dsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]); } return 0; @@ -280,7 +326,7 @@ static int dnxhd_decode_macroblocks(DNXHDContext *ctx, const uint8_t *buf, int b for (y = 0; y < ctx->mb_height; y++) { ctx->last_dc[0] = ctx->last_dc[1] = - ctx->last_dc[2] = 1<<(ctx->cid_table->bit_depth+2); // for levels +2^(bitdepth-1) + ctx->last_dc[2] = 1 << (ctx->bit_depth + 2); // for levels +2^(bitdepth-1) init_get_bits(&ctx->gb, buf + ctx->mb_scan_index[y], (buf_size - ctx->mb_scan_index[y]) << 3); for (x = 0; x < ctx->mb_width; x++) { //START_TIMER; @@ -313,7 +359,6 @@ static int dnxhd_decode_frame(AVCodecContext *avctx, void *data, int *data_size, first_field = 1; } - avctx->pix_fmt = PIX_FMT_YUV422P; if (av_image_check_size(ctx->width, ctx->height, 0, avctx)) return -1; avcodec_set_dimensions(avctx, ctx->width, ctx->height); diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c index 57ae8e205b..c29144a964 100644 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@ -1,8 +1,10 @@ /* * VC3/DNxHD encoder * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com> + * Copyright (c) 2011 MirriAd Ltd * * VC-3 encoder funded by the British Broadcasting Corporation + * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com> * * This file is part of FFmpeg. * @@ -32,6 +34,7 @@ #include "dnxhdenc.h" #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM +#define DNX10BIT_QMAT_SHIFT 18 // The largest value that will not lead to overflow for 10bit samples. static const AVOption options[]={ {"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, VE}, @@ -41,7 +44,7 @@ static const AVClass class = { "dnxhd", av_default_item_name, options, LIBAVUTIL #define LAMBDA_FRAC_BITS 10 -static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size) +static void dnxhd_8bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; for (i = 0; i < 4; i++) { @@ -58,6 +61,43 @@ static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, memcpy(block + 24, block - 32, sizeof(*block) * 8); } +static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size) +{ + int i; + + block += 32; + + for (i = 0; i < 4; i++) { + memcpy(block + i * 8, pixels + i * line_size, 8 * sizeof(*block)); + memcpy(block - (i+1) * 8, pixels + i * line_size, 8 * sizeof(*block)); + } +} + +static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, DCTELEM *block, + int n, int qscale, int *overflow) +{ + const uint8_t *scantable= ctx->intra_scantable.scantable; + const int *qmat = ctx->q_intra_matrix[qscale]; + int last_non_zero = 0; + + ctx->dsp.fdct(block); + + // Divide by 4 with rounding, to compensate scaling of DCT coefficients + block[0] = (block[0] + 2) >> 2; + + for (int i = 1; i < 64; ++i) { + int j = scantable[i]; + int sign = block[j] >> 31; + int level = (block[j] ^ sign) - sign; + level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT; + block[j] = (level ^ sign) - sign; + if (level) + last_non_zero = i; + } + + return last_non_zero; +} + static int dnxhd_init_vlc(DNXHDEncContext *ctx) { int i, j, level, run; @@ -118,31 +158,55 @@ static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias) // init first elem to 1 to avoid div by 0 in convert_matrix uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t* int qscale, i; + const uint8_t *luma_weight_table = ctx->cid_table->luma_weight; + const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight; FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail); FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail); FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail); FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail); - for (i = 1; i < 64; i++) { - int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; - weight_matrix[j] = ctx->cid_table->luma_weight[i]; - } - ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix, - ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1); - for (i = 1; i < 64; i++) { - int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; - weight_matrix[j] = ctx->cid_table->chroma_weight[i]; - } - ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix, - ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1); - for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { - for (i = 0; i < 64; i++) { - ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2; - ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2; - ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2; + if (ctx->cid_table->bit_depth == 8) { + for (i = 1; i < 64; i++) { + int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; + weight_matrix[j] = ctx->cid_table->luma_weight[i]; + } + ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix, + ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1); + for (i = 1; i < 64; i++) { + int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; + weight_matrix[j] = ctx->cid_table->chroma_weight[i]; + } + ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix, + ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1); + + for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { + for (i = 0; i < 64; i++) { + ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2; + ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2; + ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2; + } + } + } else { + // 10-bit + for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { + for (i = 1; i < 64; i++) { + int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; + + // The quantization formula from the VC-3 standard is: + // quantized = sign(block[i]) * floor(abs(block[i]/s) * p / (qscale * weight_table[i])) + // Where p is 32 for 8-bit samples and 8 for 10-bit ones. + // The s factor compensates scaling of DCT coefficients done by the DCT routines, + // and therefore is not present in standard. It's 8 for 8-bit samples and 4 for 10-bit ones. + // We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be: + // ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / (qscale * weight_table[i]) + // For 10-bit samples, p / s == 2 + ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * luma_weight_table[i]); + ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * chroma_weight_table[i]); + } } } + return 0; fail: return -1; @@ -165,10 +229,22 @@ static int dnxhd_init_rc(DNXHDEncContext *ctx) static int dnxhd_encode_init(AVCodecContext *avctx) { DNXHDEncContext *ctx = avctx->priv_data; - int i, index; + int i, index, bit_depth; + + switch (avctx->pix_fmt) { + case PIX_FMT_YUV422P: + bit_depth = 8; + break; + case PIX_FMT_YUV422P10: + bit_depth = 10; + break; + default: + av_log(avctx, AV_LOG_ERROR, "pixel format is incompatible with DNxHD\n"); + return -1; + } - ctx->cid = ff_dnxhd_find_cid(avctx); - if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) { + ctx->cid = ff_dnxhd_find_cid(avctx, bit_depth); + if (!ctx->cid) { av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n"); return -1; } @@ -181,15 +257,25 @@ static int dnxhd_encode_init(AVCodecContext *avctx) ctx->m.mb_intra = 1; ctx->m.h263_aic = 1; - ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4; + avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; dsputil_init(&ctx->m.dsp, avctx); ff_dct_common_init(&ctx->m); + if (!ctx->m.dct_quantize) + ctx->m.dct_quantize = dct_quantize_c; + + if (ctx->cid_table->bit_depth == 10) { + ctx->m.dct_quantize = dnxhd_10bit_dct_quantize; + ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym; + ctx->block_width_l2 = 4; + } else { + ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym; + ctx->block_width_l2 = 3; + } + #if HAVE_MMX ff_dnxhd_init_mmx(ctx); #endif - if (!ctx->m.dct_quantize) - ctx->m.dct_quantize = dct_quantize_c; ctx->m.mb_height = (avctx->height + 15) / 16; ctx->m.mb_width = (avctx->width + 15) / 16; @@ -255,7 +341,7 @@ static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf) AV_WB16(buf + 0x1a, avctx->width); // SPL AV_WB16(buf + 0x1d, avctx->height>>ctx->interlaced); // NAL - buf[0x21] = 0x38; // FIXME 8 bit per comp + buf[0x21] = ctx->cid_table->bit_depth == 10 ? 0x58 : 0x38; buf[0x22] = 0x88 + (ctx->interlaced<<2); AV_WB32(buf + 0x28, ctx->cid); // CID buf[0x2c] = ctx->interlaced ? 0 : 0x80; @@ -321,15 +407,27 @@ static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *b if (level) { if (level < 0) { level = (1-2*level) * qscale * weight_matrix[i]; - if (weight_matrix[i] != 32) - level += 32; - level >>= 6; + if (ctx->cid_table->bit_depth == 10) { + if (weight_matrix[i] != 8) + level += 8; + level >>= 4; + } else { + if (weight_matrix[i] != 32) + level += 32; + level >>= 6; + } level = -level; } else { level = (2*level+1) * qscale * weight_matrix[i]; - if (weight_matrix[i] != 32) - level += 32; - level >>= 6; + if (ctx->cid_table->bit_depth == 10) { + if (weight_matrix[i] != 8) + level += 8; + level >>= 4; + } else { + if (weight_matrix[i] != 32) + level += 32; + level >>= 6; + } } block[j] = level; } @@ -364,22 +462,24 @@ static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *bl static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) { - const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4); - const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3); - const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3); + const int bs = ctx->block_width_l2; + const int bw = 1 << bs; + const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs+1); + const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); + const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); DSPContext *dsp = &ctx->m.dsp; - dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); - dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); + dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); + dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); + dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); + dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { if (ctx->interlaced) { - ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize); - ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize); - ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); - ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); + ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize); + ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); + ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); + ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); } else { dsp->clear_block(ctx->blocks[4]); dsp->clear_block(ctx->blocks[5]); @@ -387,10 +487,10 @@ static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, in dsp->clear_block(ctx->blocks[7]); } } else { - dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); - dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); + dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize); + dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); + dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); + dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); } } @@ -417,7 +517,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i ctx->m.last_dc[0] = ctx->m.last_dc[1] = - ctx->m.last_dc[2] = 1024; + ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2); for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { unsigned mb = mb_y * ctx->m.mb_width + mb_x; @@ -440,6 +540,8 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i diff = block[0] - ctx->m.last_dc[n]; if (diff < 0) nbits = av_log2_16bit(-2*diff); else nbits = av_log2_16bit( 2*diff); + + assert(nbits < ctx->cid_table->bit_depth + 4); dc_bits += ctx->cid_table->dc_bits[nbits] + nbits; ctx->m.last_dc[n] = block[0]; @@ -465,7 +567,7 @@ static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int ctx->m.last_dc[0] = ctx->m.last_dc[1] = - ctx->m.last_dc[2] = 1024; + ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2); for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { unsigned mb = mb_y * ctx->m.mb_width + mb_x; int qscale = ctx->mb_qscale[mb]; @@ -515,13 +617,39 @@ static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int DNXHDEncContext *ctx = avctx->priv_data; int mb_y = jobnr, mb_x; ctx = ctx->thread[threadnr]; - for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { - unsigned mb = mb_y * ctx->m.mb_width + mb_x; - uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4); - int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize); - int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8; - ctx->mb_cmp[mb].value = varc; - ctx->mb_cmp[mb].mb = mb; + if (ctx->cid_table->bit_depth == 8) { + uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize); + for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) { + unsigned mb = mb_y * ctx->m.mb_width + mb_x; + int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize); + int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8; + ctx->mb_cmp[mb].value = varc; + ctx->mb_cmp[mb].mb = mb; + } + } else { // 10-bit + int const linesize = ctx->m.linesize >> 1; + for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) { + uint16_t *pix = (uint16_t*)ctx->thread[0]->src[0] + ((mb_y << 4) * linesize) + (mb_x << 4); + unsigned mb = mb_y * ctx->m.mb_width + mb_x; + int sum = 0; + int sqsum = 0; + int mean, sqmean; + // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8. + for (int i = 0; i < 16; ++i) { + for (int j = 0; j < 16; ++j) { + // Turn 16-bit pixels into 10-bit ones. + int const sample = (unsigned)pix[j] >> 6; + sum += sample; + sqsum += sample * sample; + // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX + } + pix += linesize; + } + mean = sum >> 8; // 16*16 == 2^8 + sqmean = sqsum >> 8; + ctx->mb_cmp[mb].value = sqmean - mean * mean; + ctx->mb_cmp[mb].mb = mb; + } } return 0; } @@ -871,7 +999,7 @@ AVCodec ff_dnxhd_encoder = { dnxhd_encode_picture, dnxhd_encode_end, .capabilities = CODEC_CAP_SLICE_THREADS, - .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE}, + .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_YUV422P10, PIX_FMT_NONE}, .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"), .priv_class = &class, }; diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h index a968ae0223..80b6f85c89 100644 --- a/libavcodec/dnxhdenc.h +++ b/libavcodec/dnxhdenc.h @@ -52,8 +52,12 @@ typedef struct DNXHDEncContext { struct DNXHDEncContext *thread[MAX_THREADS]; + // Because our samples are either 8 or 16 bits for 8-bit and 10-bit + // encoding respectively, these refer either to bytes or to two-byte words. unsigned dct_y_offset; unsigned dct_uv_offset; + unsigned block_width_l2; + int interlaced; int cur_field; diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 5c5f9db47c..6f8d8a099f 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -306,25 +306,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) return s; } -static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) -{ - int i; - - /* read the pixels */ - for(i=0;i<8;i++) { - block[0] = pixels[0]; - block[1] = pixels[1]; - block[2] = pixels[2]; - block[3] = pixels[3]; - block[4] = pixels[4]; - block[5] = pixels[5]; - block[6] = pixels[6]; - block[7] = pixels[7]; - pixels += line_size; - block += 8; - } -} - static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int stride){ int i; @@ -2836,17 +2817,22 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) ff_check_alignment(); #if CONFIG_ENCODERS - if(avctx->dct_algo==FF_DCT_FASTINT) { - c->fdct = fdct_ifast; - c->fdct248 = fdct_ifast248; - } - else if(avctx->dct_algo==FF_DCT_FAAN) { - c->fdct = ff_faandct; - c->fdct248 = ff_faandct248; - } - else { - c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default - c->fdct248 = ff_fdct248_islow; + if (avctx->bits_per_raw_sample == 10) { + c->fdct = ff_jpeg_fdct_islow_10; + c->fdct248 = ff_fdct248_islow_10; + } else { + if(avctx->dct_algo==FF_DCT_FASTINT) { + c->fdct = fdct_ifast; + c->fdct248 = fdct_ifast248; + } + else if(avctx->dct_algo==FF_DCT_FAAN) { + c->fdct = ff_faandct; + c->fdct248 = ff_faandct248; + } + else { + c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default + c->fdct248 = ff_fdct248_islow_8; + } } #endif //CONFIG_ENCODERS @@ -2910,7 +2896,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) } } - c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; c->put_pixels_clamped = ff_put_pixels_clamped_c; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c; @@ -3138,13 +3123,14 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth) -#define BIT_DEPTH_FUNCS(depth)\ +#define BIT_DEPTH_FUNCS(depth, dct)\ + c->get_pixels = FUNCC(get_pixels ## dct , depth);\ c->draw_edges = FUNCC(draw_edges , depth);\ c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\ - c->clear_block = FUNCC(clear_block , depth);\ - c->clear_blocks = FUNCC(clear_blocks , depth);\ - c->add_pixels8 = FUNCC(add_pixels8 , depth);\ - c->add_pixels4 = FUNCC(add_pixels4 , depth);\ + c->clear_block = FUNCC(clear_block ## dct , depth);\ + c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ + c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\ + c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\ c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\ c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\ \ @@ -3178,15 +3164,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) switch (avctx->bits_per_raw_sample) { case 9: - BIT_DEPTH_FUNCS(9); + if (c->dct_bits == 32) { + BIT_DEPTH_FUNCS(9, _32); + } else { + BIT_DEPTH_FUNCS(9, _16); + } break; case 10: - BIT_DEPTH_FUNCS(10); + if (c->dct_bits == 32) { + BIT_DEPTH_FUNCS(10, _32); + } else { + BIT_DEPTH_FUNCS(10, _16); + } break; default: av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); case 8: - BIT_DEPTH_FUNCS(8); + BIT_DEPTH_FUNCS(8, _16); break; } diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index cf0fa7274a..928a516d64 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -40,8 +40,10 @@ typedef short DCTELEM; void fdct_ifast (DCTELEM *data); void fdct_ifast248 (DCTELEM *data); -void ff_jpeg_fdct_islow (DCTELEM *data); -void ff_fdct248_islow (DCTELEM *data); +void ff_jpeg_fdct_islow_8(DCTELEM *data); +void ff_jpeg_fdct_islow_10(DCTELEM *data); +void ff_fdct248_islow_8(DCTELEM *data); +void ff_fdct248_islow_10(DCTELEM *data); void j_rev_dct (DCTELEM *data); void j_rev_dct4 (DCTELEM *data); @@ -217,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin * DSPContext. */ typedef struct DSPContext { + /** + * Size of DCT coefficients. + */ + int dct_bits; + /* pixel ops : interface with DCT */ void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c index 17f05e6280..85d4fec7dc 100644 --- a/libavcodec/dsputil_template.c +++ b/libavcodec/dsputil_template.c @@ -192,43 +192,89 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i } } -static void FUNCC(add_pixels8)(uint8_t *restrict p_pixels, DCTELEM *p_block, int line_size) -{ - int i; - pixel *restrict pixels = (pixel *restrict)p_pixels; - dctcoef *block = (dctcoef*)p_block; - line_size >>= sizeof(pixel)-1; - - for(i=0;i<8;i++) { - pixels[0] += block[0]; - pixels[1] += block[1]; - pixels[2] += block[2]; - pixels[3] += block[3]; - pixels[4] += block[4]; - pixels[5] += block[5]; - pixels[6] += block[6]; - pixels[7] += block[7]; - pixels += line_size; - block += 8; - } +#define DCTELEM_FUNCS(dctcoef, suffix) \ +static void FUNCC(get_pixels ## suffix)(DCTELEM *restrict _block, \ + const uint8_t *_pixels, \ + int line_size) \ +{ \ + const pixel *pixels = (const pixel *) _pixels; \ + dctcoef *restrict block = (dctcoef *) _block; \ + int i; \ + \ + /* read the pixels */ \ + for(i=0;i<8;i++) { \ + block[0] = pixels[0]; \ + block[1] = pixels[1]; \ + block[2] = pixels[2]; \ + block[3] = pixels[3]; \ + block[4] = pixels[4]; \ + block[5] = pixels[5]; \ + block[6] = pixels[6]; \ + block[7] = pixels[7]; \ + pixels += line_size / sizeof(pixel); \ + block += 8; \ + } \ +} \ + \ +static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \ + DCTELEM *_block, \ + int line_size) \ +{ \ + int i; \ + pixel *restrict pixels = (pixel *restrict)_pixels; \ + dctcoef *block = (dctcoef*)_block; \ + line_size /= sizeof(pixel); \ + \ + for(i=0;i<8;i++) { \ + pixels[0] += block[0]; \ + pixels[1] += block[1]; \ + pixels[2] += block[2]; \ + pixels[3] += block[3]; \ + pixels[4] += block[4]; \ + pixels[5] += block[5]; \ + pixels[6] += block[6]; \ + pixels[7] += block[7]; \ + pixels += line_size; \ + block += 8; \ + } \ +} \ + \ +static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \ + DCTELEM *_block, \ + int line_size) \ +{ \ + int i; \ + pixel *restrict pixels = (pixel *restrict)_pixels; \ + dctcoef *block = (dctcoef*)_block; \ + line_size /= sizeof(pixel); \ + \ + for(i=0;i<4;i++) { \ + pixels[0] += block[0]; \ + pixels[1] += block[1]; \ + pixels[2] += block[2]; \ + pixels[3] += block[3]; \ + pixels += line_size; \ + block += 4; \ + } \ +} \ + \ +static void FUNCC(clear_block ## suffix)(DCTELEM *block) \ +{ \ + memset(block, 0, sizeof(dctcoef)*64); \ +} \ + \ +/** \ + * memset(blocks, 0, sizeof(DCTELEM)*6*64) \ + */ \ +static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \ +{ \ + memset(blocks, 0, sizeof(dctcoef)*6*64); \ } -static void FUNCC(add_pixels4)(uint8_t *restrict p_pixels, DCTELEM *p_block, int line_size) -{ - int i; - pixel *restrict pixels = (pixel *restrict)p_pixels; - dctcoef *block = (dctcoef*)p_block; - line_size >>= sizeof(pixel)-1; - - for(i=0;i<4;i++) { - pixels[0] += block[0]; - pixels[1] += block[1]; - pixels[2] += block[2]; - pixels[3] += block[3]; - pixels += line_size; - block += 4; - } -} +DCTELEM_FUNCS(DCTELEM, _16) +#if BIT_DEPTH > 8 +DCTELEM_FUNCS(dctcoef, _32) +#endif #define PIXOP2(OPNAME, OP) \ static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ @@ -1232,16 +1278,3 @@ void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { FUNCC(avg_pixels16)(dst, src, stride, 16); } -static void FUNCC(clear_block)(DCTELEM *block) -{ - memset(block, 0, sizeof(dctcoef)*64); -} - -/** - * memset(blocks, 0, sizeof(DCTELEM)*6*64) - */ -static void FUNCC(clear_blocks)(DCTELEM *blocks) -{ - memset(blocks, 0, sizeof(dctcoef)*6*64); -} - diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c index 09fa80fcb9..038aa2a234 100644 --- a/libavcodec/eac3enc.c +++ b/libavcodec/eac3enc.c @@ -63,6 +63,11 @@ void ff_eac3_get_frame_exp_strategy(AC3EncodeContext *s) { int ch; + if (s->num_blocks < 6) { + s->use_frame_exp_strategy = 0; + return; + } + s->use_frame_exp_strategy = 1; for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) { int expstr = eac3_frame_expstr_index_tab[s->exp_strategy[ch][0]-1] @@ -89,7 +94,7 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s) /* set first cpl coords */ for (ch = 1; ch <= s->fbw_channels; ch++) first_cpl_coords[ch] = 1; - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; for (ch = 1; ch <= s->fbw_channels; ch++) { if (block->channel_in_cpl[ch]) { @@ -104,7 +109,7 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s) } /* set first cpl leak */ - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 0; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; if (block->cpl_in_use) { block->new_cpl_leak = 2; @@ -130,7 +135,7 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s) put_bits(&s->pb, 2, s->bit_alloc.sr_code); /* sample rate code */ } else { put_bits(&s->pb, 2, s->bit_alloc.sr_code); /* sample rate code */ - put_bits(&s->pb, 2, 0x3); /* number of blocks = 6 */ + put_bits(&s->pb, 2, s->num_blks_code); /* number of blocks */ } put_bits(&s->pb, 3, s->channel_mode); /* audio coding mode */ put_bits(&s->pb, 1, s->lfe_on); /* LFE channel indicator */ @@ -141,11 +146,15 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s) /* TODO: mixing metadata */ put_bits(&s->pb, 1, 0); /* no info metadata */ /* TODO: info metadata */ + if (s->num_blocks != 6) + put_bits(&s->pb, 1, !(s->avctx->frame_number % 6)); /* converter sync flag */ put_bits(&s->pb, 1, 0); /* no additional bit stream info */ /* frame header */ + if (s->num_blocks == 6) { put_bits(&s->pb, 1, !s->use_frame_exp_strategy);/* exponent strategy syntax */ put_bits(&s->pb, 1, 0); /* aht enabled = no */ + } put_bits(&s->pb, 2, 0); /* snr offset strategy = 1 */ put_bits(&s->pb, 1, 0); /* transient pre-noise processing enabled = no */ put_bits(&s->pb, 1, 0); /* block switch syntax enabled = no */ @@ -158,7 +167,7 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s) /* coupling strategy use flags */ if (s->channel_mode > AC3_CHMODE_MONO) { put_bits(&s->pb, 1, s->blocks[0].cpl_in_use); - for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) { + for (blk = 1; blk < s->num_blocks; blk++) { AC3Block *block = &s->blocks[blk]; put_bits(&s->pb, 1, block->new_cpl_strategy); if (block->new_cpl_strategy) @@ -170,26 +179,31 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s) for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) put_bits(&s->pb, 5, s->frame_exp_strategy[ch]); } else { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 0; blk < s->num_blocks; blk++) for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) put_bits(&s->pb, 2, s->exp_strategy[ch][blk]); } if (s->lfe_on) { - for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) + for (blk = 0; blk < s->num_blocks; blk++) put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]); } - /* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */ + /* E-AC-3 to AC-3 converter exponent strategy (not optional when num blocks == 6) */ + if (s->num_blocks != 6) { + put_bits(&s->pb, 1, 0); + } else { for (ch = 1; ch <= s->fbw_channels; ch++) { if (s->use_frame_exp_strategy) put_bits(&s->pb, 5, s->frame_exp_strategy[ch]); else put_bits(&s->pb, 5, 0); } + } /* snr offsets */ put_bits(&s->pb, 6, s->coarse_snr_offset); put_bits(&s->pb, 4, s->fine_snr_offset[1]); /* block start info */ - put_bits(&s->pb, 1, 0); + if (s->num_blocks > 1) + put_bits(&s->pb, 1, 0); } diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 1fde7455ae..83d81c95f1 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -3707,6 +3707,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); + s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; dsputil_init(&s->dsp, s->avctx); } else { av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c index 9aa584fa5c..6593e88214 100644 --- a/libavcodec/imgconvert.c +++ b/libavcodec/imgconvert.c @@ -42,9 +42,6 @@ #include "x86/dsputil_mmx.h" #endif -#define xglue(x, y) x ## y -#define glue(x, y) xglue(x, y) - #define FF_COLOR_RGB 0 /**< RGB color space */ #define FF_COLOR_GRAY 1 /**< gray color space */ #define FF_COLOR_YUV 2 /**< YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */ diff --git a/libavcodec/jfdctint.c b/libavcodec/jfdctint.c index 072c7440b5..0482bc5643 100644 --- a/libavcodec/jfdctint.c +++ b/libavcodec/jfdctint.c @@ -1,402 +1,25 @@ -/* - * jfdctint.c - * - * This file is part of the Independent JPEG Group's software. - * - * The authors make NO WARRANTY or representation, either express or implied, - * with respect to this software, its quality, accuracy, merchantability, or - * fitness for a particular purpose. This software is provided "AS IS", and - * you, its user, assume the entire risk as to its quality and accuracy. - * - * This software is copyright (C) 1991-1996, Thomas G. Lane. - * All Rights Reserved except as specified below. - * - * Permission is hereby granted to use, copy, modify, and distribute this - * software (or portions thereof) for any purpose, without fee, subject to - * these conditions: - * (1) If any part of the source code for this software is distributed, then - * this README file must be included, with this copyright and no-warranty - * notice unaltered; and any additions, deletions, or changes to the original - * files must be clearly indicated in accompanying documentation. - * (2) If only executable code is distributed, then the accompanying - * documentation must state that "this software is based in part on the work - * of the Independent JPEG Group". - * (3) Permission for use of this software is granted only if the user accepts - * full responsibility for any undesirable consequences; the authors accept - * NO LIABILITY for damages of any kind. - * - * These conditions apply to any software derived from or based on the IJG - * code, not just to the unmodified library. If you use our work, you ought - * to acknowledge us. - * - * Permission is NOT granted for the use of any IJG author's name or company - * name in advertising or publicity relating to this software or products - * derived from it. This software may be referred to only as "the Independent - * JPEG Group's software". - * - * We specifically permit and encourage the use of this software as the basis - * of commercial products, provided that all warranty or liability claims are - * assumed by the product vendor. - * - * This file contains a slow-but-accurate integer implementation of the - * forward DCT (Discrete Cosine Transform). - * - * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT - * on each column. Direct algorithms are also available, but they are - * much more complex and seem not to be any faster when reduced to code. - * - * This implementation is based on an algorithm described in - * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT - * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, - * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. - * The primary algorithm described there uses 11 multiplies and 29 adds. - * We use their alternate method with 12 multiplies and 32 adds. - * The advantage of this method is that no data path contains more than one - * multiplication; this allows a very simple and accurate implementation in - * scaled fixed-point arithmetic, with a minimal number of shifts. - */ - /** - * @file - * Independent JPEG Group's slow & accurate dct. - */ - -#include <stdlib.h> -#include <stdio.h> -#include "libavutil/common.h" -#include "dsputil.h" - -#define DCTSIZE 8 -#define BITS_IN_JSAMPLE 8 -#define GLOBAL(x) x -#define RIGHT_SHIFT(x, n) ((x) >> (n)) -#define MULTIPLY16C16(var,const) ((var)*(const)) - -#if 1 //def USE_ACCURATE_ROUNDING -#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n) -#else -#define DESCALE(x,n) RIGHT_SHIFT(x, n) -#endif - - -/* - * This module is specialized to the case DCTSIZE = 8. - */ - -#if DCTSIZE != 8 - Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ -#endif - - -/* - * The poop on this scaling stuff is as follows: - * - * Each 1-D DCT step produces outputs which are a factor of sqrt(N) - * larger than the true DCT outputs. The final outputs are therefore - * a factor of N larger than desired; since N=8 this can be cured by - * a simple right shift at the end of the algorithm. The advantage of - * this arrangement is that we save two multiplications per 1-D DCT, - * because the y0 and y4 outputs need not be divided by sqrt(N). - * In the IJG code, this factor of 8 is removed by the quantization step - * (in jcdctmgr.c), NOT in this module. + * This file is part of Libav. * - * We have to do addition and subtraction of the integer inputs, which - * is no problem, and multiplication by fractional constants, which is - * a problem to do in integer arithmetic. We multiply all the constants - * by CONST_SCALE and convert them to integer constants (thus retaining - * CONST_BITS bits of precision in the constants). After doing a - * multiplication we have to divide the product by CONST_SCALE, with proper - * rounding, to produce the correct output. This division can be done - * cheaply as a right shift of CONST_BITS bits. We postpone shifting - * as long as possible so that partial sums can be added together with - * full fractional precision. + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. * - * The outputs of the first pass are scaled up by PASS1_BITS bits so that - * they are represented to better-than-integral precision. These outputs - * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word - * with the recommended scaling. (For 12-bit sample data, the intermediate - * array is int32_t anyway.) + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * To avoid overflow of the 32-bit intermediate results in pass 2, we must - * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis - * shows that the values given below are the most effective. - */ - -#if BITS_IN_JSAMPLE == 8 -#define CONST_BITS 13 -#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */ -#else -#define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ -#endif - -/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus - * causing a lot of useless floating-point operations at run time. - * To get around this we use the following pre-calculated constants. - * If you change CONST_BITS you may want to add appropriate values. - * (With a reasonable C compiler, you can just rely on the FIX() macro...) - */ - -#if CONST_BITS == 13 -#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */ -#else -#define FIX_0_298631336 FIX(0.298631336) -#define FIX_0_390180644 FIX(0.390180644) -#define FIX_0_541196100 FIX(0.541196100) -#define FIX_0_765366865 FIX(0.765366865) -#define FIX_0_899976223 FIX(0.899976223) -#define FIX_1_175875602 FIX(1.175875602) -#define FIX_1_501321110 FIX(1.501321110) -#define FIX_1_847759065 FIX(1.847759065) -#define FIX_1_961570560 FIX(1.961570560) -#define FIX_2_053119869 FIX(2.053119869) -#define FIX_2_562915447 FIX(2.562915447) -#define FIX_3_072711026 FIX(3.072711026) -#endif - - -/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. - * For 8-bit samples with the recommended scaling, all the variable - * and constant values involved are no more than 16 bits wide, so a - * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. - * For 12-bit samples, a full 32-bit multiplication will be needed. + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2 -#define MULTIPLY(var,const) MULTIPLY16C16(var,const) -#else -#define MULTIPLY(var,const) ((var) * (const)) -#endif - - -static av_always_inline void row_fdct(DCTELEM * data){ - int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int tmp10, tmp11, tmp12, tmp13; - int z1, z2, z3, z4, z5; - DCTELEM *dataptr; - int ctr; - - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - - dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[0] + dataptr[7]; - tmp7 = dataptr[0] - dataptr[7]; - tmp1 = dataptr[1] + dataptr[6]; - tmp6 = dataptr[1] - dataptr[6]; - tmp2 = dataptr[2] + dataptr[5]; - tmp5 = dataptr[2] - dataptr[5]; - tmp3 = dataptr[3] + dataptr[4]; - tmp4 = dataptr[3] - dataptr[4]; - - /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". - */ - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); - dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); - - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS-PASS1_BITS); - dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS-PASS1_BITS); - - /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * cK represents cos(K*pi/16). - * i0..i3 in the paper are tmp4..tmp7 here. - */ - - z1 = tmp4 + tmp7; - z2 = tmp5 + tmp6; - z3 = tmp4 + tmp6; - z4 = tmp5 + tmp7; - z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - - tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ - tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ - tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ - tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - - z3 += z5; - z4 += z5; - - dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); - dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); - dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); - dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); - - dataptr += DCTSIZE; /* advance pointer to next row */ - } -} - -/* - * Perform the forward DCT on one block of samples. - */ - -GLOBAL(void) -ff_jpeg_fdct_islow (DCTELEM * data) -{ - int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int tmp10, tmp11, tmp12, tmp13; - int z1, z2, z3, z4, z5; - DCTELEM *dataptr; - int ctr; - - row_fdct(data); - - /* Pass 2: process columns. - * We remove the PASS1_BITS scaling, but leave the results scaled up - * by an overall factor of 8. - */ - - dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; - tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; - tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; - tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; - tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; - tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; - tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; - tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - - /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". - */ - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); - dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); - - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS+PASS1_BITS); - - /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * cK represents cos(K*pi/16). - * i0..i3 in the paper are tmp4..tmp7 here. - */ - - z1 = tmp4 + tmp7; - z2 = tmp5 + tmp6; - z3 = tmp4 + tmp6; - z4 = tmp5 + tmp7; - z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - - tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ - tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ - tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ - tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - - z3 += z5; - z4 += z5; - - dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, - CONST_BITS+PASS1_BITS); - - dataptr++; /* advance pointer to next column */ - } -} - -/* - * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT - * on the rows and then, instead of doing even and odd, part on the colums - * you do even part two times. - */ -GLOBAL(void) -ff_fdct248_islow (DCTELEM * data) -{ - int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int tmp10, tmp11, tmp12, tmp13; - int z1; - DCTELEM *dataptr; - int ctr; - - row_fdct(data); - - /* Pass 2: process columns. - * We remove the PASS1_BITS scaling, but leave the results scaled up - * by an overall factor of 8. - */ - - dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1]; - tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; - tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; - tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; - tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1]; - tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; - tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; - tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; - - tmp10 = tmp0 + tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - tmp13 = tmp0 - tmp3; - - dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); - dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); - - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS+PASS1_BITS); - - tmp10 = tmp4 + tmp7; - tmp11 = tmp5 + tmp6; - tmp12 = tmp5 - tmp6; - tmp13 = tmp4 - tmp7; - - dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); - dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); - - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS+PASS1_BITS); +#define BIT_DEPTH 8 +#include "jfdctint_template.c" +#undef BIT_DEPTH - dataptr++; /* advance pointer to next column */ - } -} +#define BIT_DEPTH 10 +#include "jfdctint_template.c" +#undef BIT_DEPTH diff --git a/libavcodec/jfdctint_template.c b/libavcodec/jfdctint_template.c new file mode 100644 index 0000000000..e60e72a412 --- /dev/null +++ b/libavcodec/jfdctint_template.c @@ -0,0 +1,405 @@ +/* + * jfdctint.c + * + * This file is part of the Independent JPEG Group's software. + * + * The authors make NO WARRANTY or representation, either express or implied, + * with respect to this software, its quality, accuracy, merchantability, or + * fitness for a particular purpose. This software is provided "AS IS", and + * you, its user, assume the entire risk as to its quality and accuracy. + * + * This software is copyright (C) 1991-1996, Thomas G. Lane. + * All Rights Reserved except as specified below. + * + * Permission is hereby granted to use, copy, modify, and distribute this + * software (or portions thereof) for any purpose, without fee, subject to + * these conditions: + * (1) If any part of the source code for this software is distributed, then + * this README file must be included, with this copyright and no-warranty + * notice unaltered; and any additions, deletions, or changes to the original + * files must be clearly indicated in accompanying documentation. + * (2) If only executable code is distributed, then the accompanying + * documentation must state that "this software is based in part on the work + * of the Independent JPEG Group". + * (3) Permission for use of this software is granted only if the user accepts + * full responsibility for any undesirable consequences; the authors accept + * NO LIABILITY for damages of any kind. + * + * These conditions apply to any software derived from or based on the IJG + * code, not just to the unmodified library. If you use our work, you ought + * to acknowledge us. + * + * Permission is NOT granted for the use of any IJG author's name or company + * name in advertising or publicity relating to this software or products + * derived from it. This software may be referred to only as "the Independent + * JPEG Group's software". + * + * We specifically permit and encourage the use of this software as the basis + * of commercial products, provided that all warranty or liability claims are + * assumed by the product vendor. + * + * This file contains a slow-but-accurate integer implementation of the + * forward DCT (Discrete Cosine Transform). + * + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * This implementation is based on an algorithm described in + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. + * The primary algorithm described there uses 11 multiplies and 29 adds. + * We use their alternate method with 12 multiplies and 32 adds. + * The advantage of this method is that no data path contains more than one + * multiplication; this allows a very simple and accurate implementation in + * scaled fixed-point arithmetic, with a minimal number of shifts. + */ + +/** + * @file + * Independent JPEG Group's slow & accurate dct. + */ + +#include "libavutil/common.h" +#include "dsputil.h" + +#include "bit_depth_template.c" + +#define DCTSIZE 8 +#define BITS_IN_JSAMPLE BIT_DEPTH +#define GLOBAL(x) x +#define RIGHT_SHIFT(x, n) ((x) >> (n)) +#define MULTIPLY16C16(var,const) ((var)*(const)) + +#if 1 //def USE_ACCURATE_ROUNDING +#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n) +#else +#define DESCALE(x,n) RIGHT_SHIFT(x, n) +#endif + + +/* + * This module is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 +#error "Sorry, this code only copes with 8x8 DCTs." +#endif + + +/* + * The poop on this scaling stuff is as follows: + * + * Each 1-D DCT step produces outputs which are a factor of sqrt(N) + * larger than the true DCT outputs. The final outputs are therefore + * a factor of N larger than desired; since N=8 this can be cured by + * a simple right shift at the end of the algorithm. The advantage of + * this arrangement is that we save two multiplications per 1-D DCT, + * because the y0 and y4 outputs need not be divided by sqrt(N). + * In the IJG code, this factor of 8 is removed by the quantization step + * (in jcdctmgr.c), NOT in this module. + * + * We have to do addition and subtraction of the integer inputs, which + * is no problem, and multiplication by fractional constants, which is + * a problem to do in integer arithmetic. We multiply all the constants + * by CONST_SCALE and convert them to integer constants (thus retaining + * CONST_BITS bits of precision in the constants). After doing a + * multiplication we have to divide the product by CONST_SCALE, with proper + * rounding, to produce the correct output. This division can be done + * cheaply as a right shift of CONST_BITS bits. We postpone shifting + * as long as possible so that partial sums can be added together with + * full fractional precision. + * + * The outputs of the first pass are scaled up by PASS1_BITS bits so that + * they are represented to better-than-integral precision. These outputs + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word + * with the recommended scaling. (For 12-bit sample data, the intermediate + * array is int32_t anyway.) + * + * To avoid overflow of the 32-bit intermediate results in pass 2, we must + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis + * shows that the values given below are the most effective. + */ + +#undef CONST_BITS +#undef PASS1_BITS +#undef OUT_SHIFT + +#if BITS_IN_JSAMPLE == 8 +#define CONST_BITS 13 +#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */ +#define OUT_SHIFT PASS1_BITS +#else +#define CONST_BITS 13 +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#define OUT_SHIFT (PASS1_BITS + 1) +#endif + +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus + * causing a lot of useless floating-point operations at run time. + * To get around this we use the following pre-calculated constants. + * If you change CONST_BITS you may want to add appropriate values. + * (With a reasonable C compiler, you can just rely on the FIX() macro...) + */ + +#if CONST_BITS == 13 +#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */ +#else +#define FIX_0_298631336 FIX(0.298631336) +#define FIX_0_390180644 FIX(0.390180644) +#define FIX_0_541196100 FIX(0.541196100) +#define FIX_0_765366865 FIX(0.765366865) +#define FIX_0_899976223 FIX(0.899976223) +#define FIX_1_175875602 FIX(1.175875602) +#define FIX_1_501321110 FIX(1.501321110) +#define FIX_1_847759065 FIX(1.847759065) +#define FIX_1_961570560 FIX(1.961570560) +#define FIX_2_053119869 FIX(2.053119869) +#define FIX_2_562915447 FIX(2.562915447) +#define FIX_3_072711026 FIX(3.072711026) +#endif + + +/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. + * For 8-bit samples with the recommended scaling, all the variable + * and constant values involved are no more than 16 bits wide, so a + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. + * For 12-bit samples, a full 32-bit multiplication will be needed. + */ + +#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2 +#define MULTIPLY(var,const) MULTIPLY16C16(var,const) +#else +#define MULTIPLY(var,const) ((var) * (const)) +#endif + + +static av_always_inline void FUNC(row_fdct)(DCTELEM *data) +{ + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z1, z2, z3, z4, z5; + DCTELEM *dataptr; + int ctr; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[0] + dataptr[7]; + tmp7 = dataptr[0] - dataptr[7]; + tmp1 = dataptr[1] + dataptr[6]; + tmp6 = dataptr[1] - dataptr[6]; + tmp2 = dataptr[2] + dataptr[5]; + tmp5 = dataptr[2] - dataptr[5]; + tmp3 = dataptr[3] + dataptr[4]; + tmp4 = dataptr[3] - dataptr[4]; + + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); + dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), + CONST_BITS-PASS1_BITS); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * cK represents cos(K*pi/16). + * i0..i3 in the paper are tmp4..tmp7 here. + */ + + z1 = tmp4 + tmp7; + z2 = tmp5 + tmp6; + z3 = tmp4 + tmp6; + z4 = tmp5 + tmp7; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ + + tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + + z3 += z5; + z4 += z5; + + dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } +} + +/* + * Perform the forward DCT on one block of samples. + */ + +GLOBAL(void) +FUNC(ff_jpeg_fdct_islow)(DCTELEM *data) +{ + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z1, z2, z3, z4, z5; + DCTELEM *dataptr; + int ctr; + + FUNC(row_fdct)(data); + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT); + dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS + OUT_SHIFT); + dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), + CONST_BITS + OUT_SHIFT); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * cK represents cos(K*pi/16). + * i0..i3 in the paper are tmp4..tmp7 here. + */ + + z1 = tmp4 + tmp7; + z2 = tmp5 + tmp6; + z3 = tmp4 + tmp6; + z4 = tmp5 + tmp7; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ + + tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + + z3 += z5; + z4 += z5; + + dataptr[DCTSIZE*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS + OUT_SHIFT); + dataptr[DCTSIZE*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS + OUT_SHIFT); + dataptr[DCTSIZE*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS + OUT_SHIFT); + dataptr[DCTSIZE*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS + OUT_SHIFT); + + dataptr++; /* advance pointer to next column */ + } +} + +/* + * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT + * on the rows and then, instead of doing even and odd, part on the colums + * you do even part two times. + */ +GLOBAL(void) +FUNC(ff_fdct248_islow)(DCTELEM *data) +{ + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z1; + DCTELEM *dataptr; + int ctr; + + FUNC(row_fdct)(data); + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1]; + tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; + tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; + tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; + tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; + tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + tmp13 = tmp0 - tmp3; + + dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT); + dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS+OUT_SHIFT); + dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), + CONST_BITS+OUT_SHIFT); + + tmp10 = tmp4 + tmp7; + tmp11 = tmp5 + tmp6; + tmp12 = tmp5 - tmp6; + tmp13 = tmp4 - tmp7; + + dataptr[DCTSIZE*1] = DESCALE(tmp10 + tmp11, OUT_SHIFT); + dataptr[DCTSIZE*5] = DESCALE(tmp10 - tmp11, OUT_SHIFT); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[DCTSIZE*3] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS + OUT_SHIFT); + dataptr[DCTSIZE*7] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), + CONST_BITS + OUT_SHIFT); + + dataptr++; /* advance pointer to next column */ + } +} diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c index 1a18a8a223..b5594a9a03 100644 --- a/libavcodec/mlib/dsputil_mlib.c +++ b/libavcodec/mlib/dsputil_mlib.c @@ -421,13 +421,14 @@ static void ff_fdct_mlib(DCTELEM *data) void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->get_pixels = get_pixels_mlib; c->diff_pixels = diff_pixels_mlib; c->add_pixels_clamped = add_pixels_clamped_mlib; if (!high_bit_depth) { + c->get_pixels = get_pixels_mlib; + c->put_pixels_tab[0][0] = put_pixels16_mlib; c->put_pixels_tab[0][1] = put_pixels16_x2_mlib; c->put_pixels_tab[0][2] = put_pixels16_y2_mlib; diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c index b2a0187d66..b983a44855 100644 --- a/libavcodec/mpeg4videodec.c +++ b/libavcodec/mpeg4videodec.c @@ -1527,6 +1527,22 @@ static int mpeg4_decode_gop_header(MpegEncContext * s, GetBitContext *gb){ return 0; } +static int mpeg4_decode_profile_level(MpegEncContext * s, GetBitContext *gb){ + int profile_and_level_indication; + + profile_and_level_indication = get_bits(gb, 8); + + s->avctx->profile = (profile_and_level_indication & 0xf0) >> 4; + s->avctx->level = (profile_and_level_indication & 0x0f); + + // for Simple profile, level 0 + if (s->avctx->profile == 0 && s->avctx->level == 8) { + s->avctx->level = 0; + } + + return 0; +} + static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ int width, height, vo_ver_id; @@ -2181,6 +2197,9 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb) else if(startcode == GOP_STARTCODE){ mpeg4_decode_gop_header(s, gb); } + else if(startcode == VOS_STARTCODE){ + mpeg4_decode_profile_level(s, gb); + } else if(startcode == VOP_STARTCODE){ break; } @@ -2241,6 +2260,25 @@ static av_cold int decode_init(AVCodecContext *avctx) return 0; } +static const AVProfile mpeg4_video_profiles[] = { + { FF_PROFILE_MPEG4_SIMPLE, "Simple Profile" }, + { FF_PROFILE_MPEG4_SIMPLE_SCALABLE, "Simple Scalable Profile" }, + { FF_PROFILE_MPEG4_CORE, "Core Profile" }, + { FF_PROFILE_MPEG4_MAIN, "Main Profile" }, + { FF_PROFILE_MPEG4_N_BIT, "N-bit Profile" }, + { FF_PROFILE_MPEG4_SCALABLE_TEXTURE, "Scalable Texture Profile" }, + { FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION, "Simple Face Animation Profile" }, + { FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE, "Basic Animated Texture Profile" }, + { FF_PROFILE_MPEG4_HYBRID, "Hybrid Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_REAL_TIME, "Advanced Real Time Simple Profile" }, + { FF_PROFILE_MPEG4_CORE_SCALABLE, "Code Scalable Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_CODING, "Advanced Coding Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_CORE, "Advanced Core Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" }, + { FF_PROFILE_MPEG4_SIMPLE_STUDIO, "Simple Studio Profile" }, + { FF_PROFILE_MPEG4_ADVANCED_SIMPLE, "Advanced Simple Profile" }, +}; + AVCodec ff_mpeg4_decoder = { "mpeg4", AVMEDIA_TYPE_VIDEO, @@ -2255,6 +2293,7 @@ AVCodec ff_mpeg4_decoder = { .max_lowres= 3, .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2"), .pix_fmts= ff_hwaccel_pixfmt_list_420, + .profiles = NULL_IF_CONFIG_SMALL(mpeg4_video_profiles), .update_thread_context= ONLY_IF_THREADS_ENABLED(ff_mpeg_update_thread_context) }; diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index af3244293c..6b2c7c76b6 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -69,7 +69,8 @@ void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][6 for(qscale=qmin; qscale<=qmax; qscale++){ int i; - if (dsp->fdct == ff_jpeg_fdct_islow + if (dsp->fdct == ff_jpeg_fdct_islow_8 || + dsp->fdct == ff_jpeg_fdct_islow_10 #ifdef FAAN_POSTSCALE || dsp->fdct == ff_faandct #endif diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index bda8124216..7f36fa9aad 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -1373,7 +1373,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; c->pix_abs[0][1] = sad16_x2_altivec; c->pix_abs[0][2] = sad16_y2_altivec; @@ -1387,11 +1387,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) c->sse[0]= sse16_altivec; c->pix_sum = pix_sum_altivec; c->diff_pixels = diff_pixels_altivec; - c->get_pixels = get_pixels_altivec; - if (!high_bit_depth) - c->clear_block = clear_block_altivec; c->add_bytes= add_bytes_altivec; if (!high_bit_depth) { + c->get_pixels = get_pixels_altivec; + c->clear_block = clear_block_altivec; c->put_pixels_tab[0][0] = put_pixels16_altivec; /* the two functions do the same thing, so use the same code */ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 8229fb5bf7..6e85241ee9 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -145,7 +145,7 @@ static void prefetch_ppc(void *mem, int stride, int h) void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; // Common optimizations whether AltiVec is available or not c->prefetch = prefetch_ppc; @@ -172,8 +172,9 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) c->gmc1 = gmc1_altivec; #if CONFIG_ENCODERS - if (avctx->dct_algo == FF_DCT_AUTO || - avctx->dct_algo == FF_DCT_ALTIVEC) { + if (avctx->bits_per_raw_sample <= 8 && + (avctx->dct_algo == FF_DCT_AUTO || + avctx->dct_algo == FF_DCT_ALTIVEC)) { c->fdct = fdct_altivec; } #endif //CONFIG_ENCODERS diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c index 9ba6bbaf2e..223971bd1a 100644 --- a/libavcodec/ppc/h264_altivec.c +++ b/libavcodec/ppc/h264_altivec.c @@ -967,7 +967,7 @@ H264_WEIGHT( 8, 8) H264_WEIGHT( 8, 4) void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { if (!high_bit_depth) { diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c index 707d1c9f5e..d04a425b49 100644 --- a/libavcodec/ps2/dsputil_mmi.c +++ b/libavcodec/ps2/dsputil_mmi.c @@ -142,7 +142,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) { const int idct_algo= avctx->idct_algo; - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (!high_bit_depth) { c->clear_blocks = clear_blocks_mmi; @@ -152,9 +152,9 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) c->put_pixels_tab[0][0] = put_pixels16_mmi; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; - } c->get_pixels = get_pixels_mmi; + } if (avctx->bits_per_raw_sample <= 8 && (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2)) { diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c index 8be9318cdb..e91893683f 100644 --- a/libavcodec/sh4/dsputil_align.c +++ b/libavcodec/sh4/dsputil_align.c @@ -333,7 +333,7 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK) void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) { - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_rnd_pixels16_o; diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c index 88502677d2..905e8b15e0 100644 --- a/libavcodec/sh4/dsputil_sh4.c +++ b/libavcodec/sh4/dsputil_sh4.c @@ -92,7 +92,7 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block) void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx) { const int idct_algo= avctx->idct_algo; - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; dsputil_init_align(c,avctx); if (!high_bit_depth) diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c index 28061822ba..bb80cd9b44 100644 --- a/libavcodec/sparc/dsputil_vis.c +++ b/libavcodec/sparc/dsputil_vis.c @@ -3953,7 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) { /* VIS-specific optimizations */ int accel = vis_level (); - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; if (accel & ACCEL_SPARC_VIS) { if (avctx->bits_per_raw_sample <= 8 && diff --git a/libavcodec/x86/dnxhd_mmx.c b/libavcodec/x86/dnxhd_mmx.c index 1256beef7f..1f2b035212 100644 --- a/libavcodec/x86/dnxhd_mmx.c +++ b/libavcodec/x86/dnxhd_mmx.c @@ -53,6 +53,7 @@ static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int l void ff_dnxhd_init_mmx(DNXHDEncContext *ctx) { if (av_get_cpu_flags() & AV_CPU_FLAG_SSE2) { - ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2; + if (ctx->cid_table->bit_depth == 8) + ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2; } } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 53ffacaf3e..6d96ab30a2 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2341,7 +2341,7 @@ void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); - const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + const int high_bit_depth = avctx->bits_per_raw_sample > 8; const int bit_depth = avctx->bits_per_raw_sample; if (avctx->dsp_mask) { diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index bd31205a6b..f13c1219da 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -1098,10 +1098,12 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); + int bit_depth = avctx->bits_per_raw_sample; if (mm_flags & AV_CPU_FLAG_MMX) { const int dct_algo = avctx->dct_algo; - if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ + if (avctx->bits_per_raw_sample <= 8 && + (dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) { if(mm_flags & AV_CPU_FLAG_SSE2){ c->fdct = ff_fdct_sse2; }else if(mm_flags & AV_CPU_FLAG_MMX2){ @@ -1111,7 +1113,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) } } - c->get_pixels = get_pixels_mmx; + if (bit_depth <= 8) + c->get_pixels = get_pixels_mmx; c->diff_pixels = diff_pixels_mmx; c->pix_sum = pix_sum16_mmx; @@ -1158,7 +1161,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) } if(mm_flags & AV_CPU_FLAG_SSE2){ - c->get_pixels = get_pixels_sse2; + if (bit_depth <= 8) + c->get_pixels = get_pixels_sse2; c->sum_abs_dctelem= sum_abs_dctelem_sse2; #if HAVE_YASM && HAVE_ALIGNED_STACK c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2; |