diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-03-06 03:56:25 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-03-06 06:03:32 +0100 |
commit | f095391a140ed3f379e1fb16605fac821c3e6660 (patch) | |
tree | 6b0be38bffb002457cba26183c57e56d5d464551 /libavcodec | |
parent | 01606d10e600c4794d89490e731c321fb73a5141 (diff) | |
parent | 632eb1bbae473f7105e0ec6556cb040ea6d30910 (diff) | |
download | ffmpeg-f095391a140ed3f379e1fb16605fac821c3e6660.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (31 commits)
cdxl demux: do not create packets with uninitialized data at EOF.
Replace computations of remaining bits with calls to get_bits_left().
amrnb/amrwb: Remove get_bits usage.
cosmetics: reindent
avformat: do not require a pixel/sample format if there is no decoder
avformat: do not fill-in audio packet duration in compute_pkt_fields()
lavf: Use av_get_audio_frame_duration() in get_audio_frame_size()
dca_parser: parse the sample rate and frame durations
libspeexdec: do not set AVCodecContext.frame_size
libopencore-amr: do not set AVCodecContext.frame_size
alsdec: do not set AVCodecContext.frame_size
siff: do not set AVCodecContext.frame_size
amr demuxer: do not set AVCodecContext.frame_size.
aiffdec: do not set AVCodecContext.frame_size
mov: do not set AVCodecContext.frame_size
ape: do not set AVCodecContext.frame_size.
rdt: remove workaround for infinite loop with aac
avformat: do not require frame_size in avformat_find_stream_info() for CELT
avformat: do not require frame_size in avformat_find_stream_info() for MP1/2/3
avformat: do not require frame_size in avformat_find_stream_info() for AAC
...
Conflicts:
doc/APIchanges
libavcodec/Makefile
libavcodec/avcodec.h
libavcodec/h264.c
libavcodec/h264_ps.c
libavcodec/utils.c
libavcodec/version.h
libavcodec/x86/dsputil_mmx.c
libavformat/utils.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/Makefile | 3 | ||||
-rw-r--r-- | libavcodec/alsdec.c | 1 | ||||
-rw-r--r-- | libavcodec/amrnbdec.c | 10 | ||||
-rw-r--r-- | libavcodec/amrwbdec.c | 10 | ||||
-rw-r--r-- | libavcodec/avcodec.h | 20 | ||||
-rw-r--r-- | libavcodec/dca.c | 47 | ||||
-rw-r--r-- | libavcodec/dca_parser.c | 80 | ||||
-rw-r--r-- | libavcodec/dca_parser.h | 36 | ||||
-rw-r--r-- | libavcodec/escape124.c | 2 | ||||
-rw-r--r-- | libavcodec/h261dec.c | 2 | ||||
-rw-r--r-- | libavcodec/h263dec.c | 2 | ||||
-rw-r--r-- | libavcodec/h264.c | 8 | ||||
-rw-r--r-- | libavcodec/h264_ps.c | 2 | ||||
-rw-r--r-- | libavcodec/h264_sei.c | 2 | ||||
-rw-r--r-- | libavcodec/huffyuv.c | 4 | ||||
-rw-r--r-- | libavcodec/ituh263dec.c | 4 | ||||
-rw-r--r-- | libavcodec/libopencore-amr.c | 1 | ||||
-rw-r--r-- | libavcodec/libspeexdec.c | 3 | ||||
-rw-r--r-- | libavcodec/mjpegdec.c | 9 | ||||
-rw-r--r-- | libavcodec/utils.c | 176 | ||||
-rw-r--r-- | libavcodec/version.h | 2 | ||||
-rw-r--r-- | libavcodec/vp6.c | 2 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 898 |
23 files changed, 806 insertions, 518 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2e11dc89a2..e4834f3c3d 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -118,7 +118,8 @@ OBJS-$(CONFIG_CLJR_ENCODER) += cljr.o OBJS-$(CONFIG_COOK_DECODER) += cook.o OBJS-$(CONFIG_CSCD_DECODER) += cscd.o OBJS-$(CONFIG_CYUV_DECODER) += cyuv.o -OBJS-$(CONFIG_DCA_DECODER) += dca.o synth_filter.o dcadsp.o +OBJS-$(CONFIG_DCA_DECODER) += dca.o synth_filter.o dcadsp.o \ + dca_parser.o OBJS-$(CONFIG_DCA_ENCODER) += dcaenc.o OBJS-$(CONFIG_DIRAC_DECODER) += diracdec.o dirac.o diracdsp.o \ dirac_arith.o mpeg12data.o dwt.o diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c index 8abba606a8..1c6e25932d 100644 --- a/libavcodec/alsdec.c +++ b/libavcodec/alsdec.c @@ -1695,7 +1695,6 @@ static av_cold int decode_init(AVCodecContext *avctx) ctx->reverted_channels = NULL; } - avctx->frame_size = sconf->frame_length; channel_size = sconf->frame_length + sconf->max_order; ctx->prev_raw_samples = av_malloc (sizeof(*ctx->prev_raw_samples) * sconf->max_order); diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c index dc7f95ccbc..ead06ca0d4 100644 --- a/libavcodec/amrnbdec.c +++ b/libavcodec/amrnbdec.c @@ -44,7 +44,6 @@ #include <math.h> #include "avcodec.h" -#include "get_bits.h" #include "libavutil/common.h" #include "celp_math.h" #include "celp_filters.h" @@ -189,16 +188,11 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx) static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf, int buf_size) { - GetBitContext gb; enum Mode mode; - init_get_bits(&gb, buf, buf_size * 8); - // Decode the first octet. - skip_bits(&gb, 1); // padding bit - mode = get_bits(&gb, 4); // frame type - p->bad_frame_indicator = !get_bits1(&gb); // quality bit - skip_bits(&gb, 2); // two padding bits + mode = buf[0] >> 3 & 0x0F; // frame type + p->bad_frame_indicator = (buf[0] & 0x4) != 0x4; // quality bit if (mode >= N_MODES || buf_size < frame_sizes_nb[mode] + 1) { return NO_DATA; diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c index 663fd0f2e9..e3570d3e63 100644 --- a/libavcodec/amrwbdec.c +++ b/libavcodec/amrwbdec.c @@ -27,7 +27,6 @@ #include "libavutil/lfg.h" #include "avcodec.h" -#include "get_bits.h" #include "lsp.h" #include "celp_math.h" #include "celp_filters.h" @@ -120,14 +119,9 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx) */ static int decode_mime_header(AMRWBContext *ctx, const uint8_t *buf) { - GetBitContext gb; - init_get_bits(&gb, buf, 8); - /* Decode frame header (1st octet) */ - skip_bits(&gb, 1); // padding bit - ctx->fr_cur_mode = get_bits(&gb, 4); - ctx->fr_quality = get_bits1(&gb); - skip_bits(&gb, 2); // padding bits + ctx->fr_cur_mode = buf[0] >> 3 & 0x0F; + ctx->fr_quality = (buf[0] & 0x4) != 0x4; return 1; } diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 397fcfb7b5..ccbcc33327 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -4128,6 +4128,26 @@ int av_get_bits_per_sample(enum CodecID codec_id); */ enum CodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be); +/** + * Return codec bits per sample. + * Only return non-zero if the bits per sample is exactly correct, not an + * approximation. + * + * @param[in] codec_id the codec + * @return Number of bits per sample or zero if unknown for the given codec. + */ +int av_get_exact_bits_per_sample(enum CodecID codec_id); + +/** + * Return audio frame duration. + * + * @param avctx codec context + * @param frame_bytes size of the frame, or 0 if unknown + * @return frame duration, in samples, if known. 0 if not able to + * determine. + */ +int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes); + /* frame parsing */ typedef struct AVCodecParserContext { void *priv_data; diff --git a/libavcodec/dca.c b/libavcodec/dca.c index 315826371c..21df38a187 100644 --- a/libavcodec/dca.c +++ b/libavcodec/dca.c @@ -38,6 +38,7 @@ #include "dcadata.h" #include "dcahuff.h" #include "dca.h" +#include "dca_parser.h" #include "synth_filter.h" #include "dcadsp.h" #include "fmtconvert.h" @@ -1382,47 +1383,6 @@ static int dca_decode_block(DCAContext *s, int base_channel, int block_index) } /** - * Convert bitstream to one representation based on sync marker - */ -static int dca_convert_bitstream(const uint8_t *src, int src_size, uint8_t *dst, - int max_size) -{ - uint32_t mrk; - int i, tmp; - const uint16_t *ssrc = (const uint16_t *) src; - uint16_t *sdst = (uint16_t *) dst; - PutBitContext pb; - - if ((unsigned) src_size > (unsigned) max_size) { -// av_log(NULL, AV_LOG_ERROR, "Input frame size larger than DCA_MAX_FRAME_SIZE!\n"); -// return -1; - src_size = max_size; - } - - mrk = AV_RB32(src); - switch (mrk) { - case DCA_MARKER_RAW_BE: - memcpy(dst, src, src_size); - return src_size; - case DCA_MARKER_RAW_LE: - for (i = 0; i < (src_size + 1) >> 1; i++) - *sdst++ = av_bswap16(*ssrc++); - return src_size; - case DCA_MARKER_14B_BE: - case DCA_MARKER_14B_LE: - init_put_bits(&pb, dst, max_size); - for (i = 0; i < (src_size + 1) >> 1; i++, src += 2) { - tmp = ((mrk == DCA_MARKER_14B_BE) ? AV_RB16(src) : AV_RL16(src)) & 0x3FFF; - put_bits(&pb, 14, tmp); - } - flush_put_bits(&pb); - return (put_bits_count(&pb) + 7) >> 3; - default: - return AVERROR_INVALIDDATA; - } -} - -/** * Return the number of channels in an ExSS speaker mask (HD) */ static int dca_exss_mask2count(int mask) @@ -1709,8 +1669,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data, s->xch_present = 0; - s->dca_buffer_size = dca_convert_bitstream(buf, buf_size, s->dca_buffer, - DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE); + s->dca_buffer_size = ff_dca_convert_bitstream(buf, buf_size, s->dca_buffer, + DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE); if (s->dca_buffer_size == AVERROR_INVALIDDATA) { av_log(avctx, AV_LOG_ERROR, "Not a valid DCA frame\n"); return AVERROR_INVALIDDATA; @@ -1724,7 +1684,6 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data, //set AVCodec values with parsed data avctx->sample_rate = s->sample_rate; avctx->bit_rate = s->bit_rate; - avctx->frame_size = s->sample_blocks * 32; s->profile = FF_PROFILE_DTS; diff --git a/libavcodec/dca_parser.c b/libavcodec/dca_parser.c index 0c8ea8a07f..9a35e020fc 100644 --- a/libavcodec/dca_parser.c +++ b/libavcodec/dca_parser.c @@ -24,6 +24,10 @@ #include "parser.h" #include "dca.h" +#include "dcadata.h" +#include "dca_parser.h" +#include "get_bits.h" +#include "put_bits.h" typedef struct DCAParseContext { ParseContext pc; @@ -98,6 +102,71 @@ static av_cold int dca_parse_init(AVCodecParserContext * s) return 0; } +int ff_dca_convert_bitstream(const uint8_t *src, int src_size, uint8_t *dst, + int max_size) +{ + uint32_t mrk; + int i, tmp; + const uint16_t *ssrc = (const uint16_t *) src; + uint16_t *sdst = (uint16_t *) dst; + PutBitContext pb; + + if ((unsigned) src_size > (unsigned) max_size) + src_size = max_size; + + mrk = AV_RB32(src); + switch (mrk) { + case DCA_MARKER_RAW_BE: + memcpy(dst, src, src_size); + return src_size; + case DCA_MARKER_RAW_LE: + for (i = 0; i < (src_size + 1) >> 1; i++) + *sdst++ = av_bswap16(*ssrc++); + return src_size; + case DCA_MARKER_14B_BE: + case DCA_MARKER_14B_LE: + init_put_bits(&pb, dst, max_size); + for (i = 0; i < (src_size + 1) >> 1; i++, src += 2) { + tmp = ((mrk == DCA_MARKER_14B_BE) ? AV_RB16(src) : AV_RL16(src)) & 0x3FFF; + put_bits(&pb, 14, tmp); + } + flush_put_bits(&pb); + return (put_bits_count(&pb) + 7) >> 3; + default: + return AVERROR_INVALIDDATA; + } +} + +static int dca_parse_params(const uint8_t *buf, int buf_size, int *duration, + int *sample_rate) +{ + GetBitContext gb; + uint8_t hdr[12 + FF_INPUT_BUFFER_PADDING_SIZE] = { 0 }; + int ret, sample_blocks, sr_code; + + if (buf_size < 12) + return AVERROR_INVALIDDATA; + + if ((ret = ff_dca_convert_bitstream(buf, 12, hdr, 12)) < 0) + return ret; + + init_get_bits(&gb, hdr, 96); + + skip_bits_long(&gb, 39); + sample_blocks = get_bits(&gb, 7) + 1; + if (sample_blocks < 8) + return AVERROR_INVALIDDATA; + *duration = 256 * (sample_blocks / 8); + + skip_bits(&gb, 20); + sr_code = get_bits(&gb, 4); + *sample_rate = dca_sample_rates[sr_code]; + if (*sample_rate == 0) + return AVERROR_INVALIDDATA; + + return 0; +} + static int dca_parse(AVCodecParserContext * s, AVCodecContext * avctx, const uint8_t ** poutbuf, int *poutbuf_size, @@ -105,7 +174,7 @@ static int dca_parse(AVCodecParserContext * s, { DCAParseContext *pc1 = s->priv_data; ParseContext *pc = &pc1->pc; - int next; + int next, duration, sample_rate; if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) { next = buf_size; @@ -118,6 +187,15 @@ static int dca_parse(AVCodecParserContext * s, return buf_size; } } + + /* read the duration and sample rate from the frame header */ + if (!dca_parse_params(buf, buf_size, &duration, &sample_rate)) { + s->duration = duration; + if (!avctx->sample_rate) + avctx->sample_rate = sample_rate; + } else + s->duration = 0; + *poutbuf = buf; *poutbuf_size = buf_size; return next; diff --git a/libavcodec/dca_parser.h b/libavcodec/dca_parser.h new file mode 100644 index 0000000000..f480eab7ce --- /dev/null +++ b/libavcodec/dca_parser.h @@ -0,0 +1,36 @@ +/* + * DCA parser + * Copyright (C) 2004 Gildas Bazin + * Copyright (C) 2004 Benjamin Zores + * Copyright (C) 2006 Benjamin Larsson + * Copyright (C) 2007 Konstantin Shishkov + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCA_PARSER_H +#define AVCODEC_DCA_PARSER_H + +#include <stdint.h> + +/** + * Convert bitstream to one representation based on sync marker + */ +int ff_dca_convert_bitstream(const uint8_t *src, int src_size, uint8_t *dst, + int max_size); + +#endif /* AVCODEC_DCA_PARSER_H */ diff --git a/libavcodec/escape124.c b/libavcodec/escape124.c index fd76e59fb2..ca32480722 100644 --- a/libavcodec/escape124.c +++ b/libavcodec/escape124.c @@ -49,7 +49,7 @@ typedef struct Escape124Context { } Escape124Context; static int can_safely_read(GetBitContext* gb, int bits) { - return get_bits_count(gb) + bits <= gb->size_in_bits; + return get_bits_left(gb) >= bits; } /** diff --git a/libavcodec/h261dec.c b/libavcodec/h261dec.c index e97c76dab4..f5ef4b4390 100644 --- a/libavcodec/h261dec.c +++ b/libavcodec/h261dec.c @@ -265,7 +265,7 @@ static int h261_decode_mb(H261Context *h){ while( h->mba_diff == MBA_STUFFING ); // stuffing if ( h->mba_diff < 0 ){ - if ( get_bits_count(&s->gb) + 7 >= s->gb.size_in_bits ) + if (get_bits_left(&s->gb) <= 7) return SLICE_END; av_log(s->avctx, AV_LOG_ERROR, "illegal mba at %d %d\n", s->mb_x, s->mb_y); diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c index b5b6d8c863..9fd79b9e60 100644 --- a/libavcodec/h263dec.c +++ b/libavcodec/h263dec.c @@ -664,7 +664,7 @@ retry: ret = decode_slice(s); while(s->mb_y<s->mb_height){ if(s->msmpeg4_version){ - if(s->slice_height==0 || s->mb_x!=0 || (s->mb_y%s->slice_height)!=0 || get_bits_count(&s->gb) > s->gb.size_in_bits) + if(s->slice_height==0 || s->mb_x!=0 || (s->mb_y%s->slice_height)!=0 || get_bits_left(&s->gb)<0) break; }else{ int prev_x=s->mb_x, prev_y=s->mb_y; diff --git a/libavcodec/h264.c b/libavcodec/h264.c index caf4b8ae5f..b4cd1a4cf0 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -3687,8 +3687,8 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){ if(s->mb_y >= s->mb_height){ tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); - if( get_bits_count(&s->gb) == s->gb.size_in_bits - || get_bits_count(&s->gb) < s->gb.size_in_bits && !(s->avctx->err_recognition & AV_EF_AGGRESSIVE)) { + if ( get_bits_left(&s->gb) == 0 + || get_bits_left(&s->gb) > 0 && !(s->avctx->err_recognition & AV_EF_AGGRESSIVE)) { ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask); return 0; @@ -3700,9 +3700,9 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){ } } - if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ + if (get_bits_left(&s->gb) <= 0 && s->mb_skip_run <= 0){ tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); - if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ + if (get_bits_left(&s->gb) == 0) { ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask); if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x); diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index 5c21d80265..e56a7160e1 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -241,7 +241,7 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){ sps->num_reorder_frames= get_ue_golomb(&s->gb); get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/ - if(get_bits_left(&s->gb) < 0){ + if (get_bits_left(&s->gb) < 0) { sps->num_reorder_frames=0; sps->bitstream_restriction_flag= 0; } diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c index 80d70e513c..0e6fde22df 100644 --- a/libavcodec/h264_sei.c +++ b/libavcodec/h264_sei.c @@ -164,7 +164,7 @@ static int decode_buffering_period(H264Context *h){ int ff_h264_decode_sei(H264Context *h){ MpegEncContext * const s = &h->s; - while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){ + while (get_bits_left(&s->gb) > 16) { int size, type; type=0; diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c index b9ba6dd2d4..ff52eaac73 100644 --- a/libavcodec/huffyuv.c +++ b/libavcodec/huffyuv.c @@ -752,7 +752,7 @@ static void decode_422_bitstream(HYuvContext *s, int count){ count/=2; if(count >= (get_bits_left(&s->gb))/(31*4)){ - for(i=0; i<count && get_bits_count(&s->gb) < s->gb.size_in_bits; i++){ + for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) { READ_2PIX(s->temp[0][2*i ], s->temp[1][i], 1); READ_2PIX(s->temp[0][2*i+1], s->temp[2][i], 2); } @@ -770,7 +770,7 @@ static void decode_gray_bitstream(HYuvContext *s, int count){ count/=2; if(count >= (get_bits_left(&s->gb))/(31*2)){ - for(i=0; i<count && get_bits_count(&s->gb) < s->gb.size_in_bits; i++){ + for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) { READ_2PIX(s->temp[0][2*i ], s->temp[0][2*i+1], 0); } }else{ diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c index b8224b31e6..0c3cf2fc89 100644 --- a/libavcodec/ituh263dec.c +++ b/libavcodec/ituh263dec.c @@ -854,8 +854,8 @@ end: { int v= show_bits(&s->gb, 16); - if(get_bits_count(&s->gb) + 16 > s->gb.size_in_bits){ - v>>= get_bits_count(&s->gb) + 16 - s->gb.size_in_bits; + if (get_bits_left(&s->gb) < 16) { + v >>= 16 - get_bits_left(&s->gb); } if(v==0) diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c index 90a8c651e2..d66f749e47 100644 --- a/libavcodec/libopencore-amr.c +++ b/libavcodec/libopencore-amr.c @@ -33,7 +33,6 @@ static void amr_decode_fix_avctx(AVCodecContext *avctx) if (!avctx->channels) avctx->channels = 1; - avctx->frame_size = 160 * is_amr_wb; avctx->sample_fmt = AV_SAMPLE_FMT_S16; } diff --git a/libavcodec/libspeexdec.c b/libavcodec/libspeexdec.c index fdc39532b7..7c36208bf1 100644 --- a/libavcodec/libspeexdec.c +++ b/libavcodec/libspeexdec.c @@ -54,9 +54,6 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx) if (s->header) { avctx->sample_rate = s->header->rate; avctx->channels = s->header->nb_channels; - avctx->frame_size = s->frame_size = s->header->frame_size; - if (s->header->frames_per_packet) - avctx->frame_size *= s->header->frames_per_packet; mode = speex_lib_get_mode(s->header->mode); if (!mode) { diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c index c5db0f1d41..499ed4ca0e 100644 --- a/libavcodec/mjpegdec.c +++ b/libavcodec/mjpegdec.c @@ -985,9 +985,9 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, if (s->restart_interval && !s->restart_count) s->restart_count = s->restart_interval; - if (get_bits_count(&s->gb)>s->gb.size_in_bits) { + if (get_bits_left(&s->gb) < 0) { av_log(s->avctx, AV_LOG_ERROR, "overread %d\n", - get_bits_count(&s->gb) - s->gb.size_in_bits); + -get_bits_left(&s->gb)); return -1; } for (i = 0; i < nb_components; i++) { @@ -1270,7 +1270,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) len = get_bits(&s->gb, 16); if (len < 5) return -1; - if (8 * len + get_bits_count(&s->gb) > s->gb.size_in_bits) + if (8 * len > get_bits_left(&s->gb)) return -1; id = get_bits_long(&s->gb, 32); @@ -1408,8 +1408,7 @@ out: static int mjpeg_decode_com(MJpegDecodeContext *s) { int len = get_bits(&s->gb, 16); - if (len >= 2 && - 8 * len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) { + if (len >= 2 && 8 * len - 16 <= get_bits_left(&s->gb)) { char *cbuf = av_malloc(len - 1); if (cbuf) { int i; diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 6c49905e65..f8c98d0ce1 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -1838,21 +1838,15 @@ void avcodec_default_free_buffers(AVCodecContext *avctx) } } -int av_get_bits_per_sample(enum CodecID codec_id){ +int av_get_exact_bits_per_sample(enum CodecID codec_id) +{ switch(codec_id){ - case CODEC_ID_ADPCM_SBPRO_2: - return 2; - case CODEC_ID_ADPCM_SBPRO_3: - return 3; - case CODEC_ID_ADPCM_SBPRO_4: case CODEC_ID_ADPCM_CT: case CODEC_ID_ADPCM_IMA_APC: - case CODEC_ID_ADPCM_IMA_WAV: - case CODEC_ID_ADPCM_IMA_QT: - case CODEC_ID_ADPCM_SWF: - case CODEC_ID_ADPCM_MS: - case CODEC_ID_ADPCM_YAMAHA: + case CODEC_ID_ADPCM_IMA_EA_SEAD: + case CODEC_ID_ADPCM_IMA_WS: case CODEC_ID_ADPCM_G722: + case CODEC_ID_ADPCM_YAMAHA: return 4; case CODEC_ID_PCM_ALAW: case CODEC_ID_PCM_MULAW: @@ -1908,6 +1902,166 @@ enum CodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be) return map[fmt][be]; } +int av_get_bits_per_sample(enum CodecID codec_id) +{ + switch (codec_id) { + case CODEC_ID_ADPCM_SBPRO_2: + return 2; + case CODEC_ID_ADPCM_SBPRO_3: + return 3; + case CODEC_ID_ADPCM_SBPRO_4: + case CODEC_ID_ADPCM_IMA_WAV: + case CODEC_ID_ADPCM_IMA_QT: + case CODEC_ID_ADPCM_SWF: + case CODEC_ID_ADPCM_MS: + return 4; + default: + return av_get_exact_bits_per_sample(codec_id); + } +} + +int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes) +{ + int id, sr, ch, ba, tag, bps; + + id = avctx->codec_id; + sr = avctx->sample_rate; + ch = avctx->channels; + ba = avctx->block_align; + tag = avctx->codec_tag; + bps = av_get_exact_bits_per_sample(avctx->codec_id); + + /* codecs with an exact constant bits per sample */ + if (bps > 0 && ch > 0 && frame_bytes > 0) + return (frame_bytes * 8) / (bps * ch); + bps = avctx->bits_per_coded_sample; + + /* codecs with a fixed packet duration */ + switch (id) { + case CODEC_ID_ADPCM_ADX: return 32; + case CODEC_ID_ADPCM_IMA_QT: return 64; + case CODEC_ID_ADPCM_EA_XAS: return 128; + case CODEC_ID_AMR_NB: + case CODEC_ID_GSM: + case CODEC_ID_QCELP: + case CODEC_ID_RA_144: + case CODEC_ID_RA_288: return 160; + case CODEC_ID_IMC: return 256; + case CODEC_ID_AMR_WB: + case CODEC_ID_GSM_MS: return 320; + case CODEC_ID_MP1: return 384; + case CODEC_ID_ATRAC1: return 512; + case CODEC_ID_ATRAC3: return 1024; + case CODEC_ID_MP2: + case CODEC_ID_MUSEPACK7: return 1152; + case CODEC_ID_AC3: return 1536; + } + + if (sr > 0) { + /* calc from sample rate */ + if (id == CODEC_ID_TTA) + return 256 * sr / 245; + + if (ch > 0) { + /* calc from sample rate and channels */ + if (id == CODEC_ID_BINKAUDIO_DCT) + return (480 << (sr / 22050)) / ch; + } + } + + if (ba > 0) { + /* calc from block_align */ + if (id == CODEC_ID_SIPR) { + switch (ba) { + case 20: return 160; + case 19: return 144; + case 29: return 288; + case 37: return 480; + } + } + } + + if (frame_bytes > 0) { + /* calc from frame_bytes only */ + if (id == CODEC_ID_TRUESPEECH) + return 240 * (frame_bytes / 32); + if (id == CODEC_ID_NELLYMOSER) + return 256 * (frame_bytes / 64); + + if (bps > 0) { + /* calc from frame_bytes and bits_per_coded_sample */ + if (id == CODEC_ID_ADPCM_G726) + return frame_bytes * 8 / bps; + } + + if (ch > 0) { + /* calc from frame_bytes and channels */ + switch (id) { + case CODEC_ID_ADPCM_4XM: + case CODEC_ID_ADPCM_IMA_ISS: + return (frame_bytes - 4 * ch) * 2 / ch; + case CODEC_ID_ADPCM_IMA_SMJPEG: + return (frame_bytes - 4) * 2 / ch; + case CODEC_ID_ADPCM_IMA_AMV: + return (frame_bytes - 8) * 2 / ch; + case CODEC_ID_ADPCM_XA: + return (frame_bytes / 128) * 224 / ch; + case CODEC_ID_INTERPLAY_DPCM: + return (frame_bytes - 6 - ch) / ch; + case CODEC_ID_ROQ_DPCM: + return (frame_bytes - 8) / ch; + case CODEC_ID_XAN_DPCM: + return (frame_bytes - 2 * ch) / ch; + case CODEC_ID_MACE3: + return 3 * frame_bytes / ch; + case CODEC_ID_MACE6: + return 6 * frame_bytes / ch; + case CODEC_ID_PCM_LXF: + return 2 * (frame_bytes / (5 * ch)); + } + + if (tag) { + /* calc from frame_bytes, channels, and codec_tag */ + if (id == CODEC_ID_SOL_DPCM) { + if (tag == 3) + return frame_bytes / ch; + else + return frame_bytes * 2 / ch; + } + } + + if (ba > 0) { + /* calc from frame_bytes, channels, and block_align */ + int blocks = frame_bytes / ba; + switch (avctx->codec_id) { + case CODEC_ID_ADPCM_IMA_WAV: + return blocks * (1 + (ba - 4 * ch) / (4 * ch) * 8); + case CODEC_ID_ADPCM_IMA_DK3: + return blocks * (((ba - 16) * 8 / 3) / ch); + case CODEC_ID_ADPCM_IMA_DK4: + return blocks * (1 + (ba - 4 * ch) * 2 / ch); + case CODEC_ID_ADPCM_MS: + return blocks * (2 + (ba - 7 * ch) * 2 / ch); + } + } + + if (bps > 0) { + /* calc from frame_bytes, channels, and bits_per_coded_sample */ + switch (avctx->codec_id) { + case CODEC_ID_PCM_DVD: + return 2 * (frame_bytes / ((bps * 2 / 8) * ch)); + case CODEC_ID_PCM_BLURAY: + return frame_bytes / ((FFALIGN(ch, 2) * bps) / 8); + case CODEC_ID_S302M: + return 2 * (frame_bytes / ((bps + 4) / 4)) / ch; + } + } + } + } + + return 0; +} + #if !HAVE_THREADS int ff_thread_init(AVCodecContext *s){ return -1; diff --git a/libavcodec/version.h b/libavcodec/version.h index 72d5b2e075..95fbd00450 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -21,7 +21,7 @@ #define AVCODEC_VERSION_H #define LIBAVCODEC_VERSION_MAJOR 54 -#define LIBAVCODEC_VERSION_MINOR 9 +#define LIBAVCODEC_VERSION_MINOR 10 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c index 8b975029d8..5d7efea224 100644 --- a/libavcodec/vp6.c +++ b/libavcodec/vp6.c @@ -387,7 +387,7 @@ static void vp6_parse_coeff_huffman(VP56Context *s) if (coeff_idx) break; } else { - if (get_bits_count(&s->gb) >= s->gb.size_in_bits) + if (get_bits_left(&s->gb) <= 0) return; coeff = get_vlc2(&s->gb, vlc_coeff->table, 9, 3); if (coeff == 0) { diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 8b976f7800..976008af9b 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2415,12 +2415,470 @@ extern void ff_butterflies_float_interleave_sse(float *dst, const float *src0, extern void ff_butterflies_float_interleave_avx(float *dst, const float *src0, const float *src1, int len); -void ff_dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) +#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ + c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU + +#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ + c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ + c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU + +#define H264_QPEL_FUNCS(x, y, CPU) \ + c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU; \ + c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU; \ + c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU; \ + c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU + +#define H264_QPEL_FUNCS_10(x, y, CPU) \ + c->put_h264_qpel_pixels_tab[0][x+y*4] = ff_put_h264_qpel16_mc##x##y##_10_##CPU; \ + c->put_h264_qpel_pixels_tab[1][x+y*4] = ff_put_h264_qpel8_mc##x##y##_10_##CPU; \ + c->avg_h264_qpel_pixels_tab[0][x+y*4] = ff_avg_h264_qpel16_mc##x##y##_10_##CPU; \ + c->avg_h264_qpel_pixels_tab[1][x+y*4] = ff_avg_h264_qpel8_mc##x##y##_10_##CPU; + +static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) +{ + const int high_bit_depth = avctx->bits_per_raw_sample > 8; + + c->put_pixels_clamped = ff_put_pixels_clamped_mmx; + c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; + c->add_pixels_clamped = ff_add_pixels_clamped_mmx; + + if (!high_bit_depth) { + c->clear_block = clear_block_mmx; + c->clear_blocks = clear_blocks_mmx; + c->draw_edges = draw_edges_mmx; + + SET_HPEL_FUNCS(put, 0, 16, mmx); + SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx); + SET_HPEL_FUNCS(avg, 0, 16, mmx); + SET_HPEL_FUNCS(avg_no_rnd, 0, 16, mmx); + SET_HPEL_FUNCS(put, 1, 8, mmx); + SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx); + SET_HPEL_FUNCS(avg, 1, 8, mmx); + SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx); + } + +#if ARCH_X86_32 || !HAVE_YASM + c->gmc= gmc_mmx; +#endif +#if ARCH_X86_32 && HAVE_YASM + if (!high_bit_depth) + c->emulated_edge_mc = emulated_edge_mc_mmx; +#endif + + c->add_bytes = add_bytes_mmx; + + c->put_no_rnd_pixels_l2[0]= put_vp_no_rnd_pixels16_l2_mmx; + c->put_no_rnd_pixels_l2[1]= put_vp_no_rnd_pixels8_l2_mmx; + + if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { + c->h263_v_loop_filter = h263_v_loop_filter_mmx; + c->h263_h_loop_filter = h263_h_loop_filter_mmx; + } + +#if HAVE_YASM + if (!high_bit_depth && CONFIG_H264CHROMA) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_mmx_rnd; + c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; + } + + c->vector_clip_int32 = ff_vector_clip_int32_mmx; +#endif + +} + +static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, + int mm_flags) +{ + const int bit_depth = avctx->bits_per_raw_sample; + const int high_bit_depth = bit_depth > 8; + + c->prefetch = prefetch_mmx2; + + if (!high_bit_depth) { + c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; + c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; + + c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; + + c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; + c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; + + c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; + } + + if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { + if (!high_bit_depth) { + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; + + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + } + + if (CONFIG_VP3_DECODER && HAVE_YASM) { + c->vp3_v_loop_filter = ff_vp3_v_loop_filter_mmx2; + c->vp3_h_loop_filter = ff_vp3_h_loop_filter_mmx2; + } + } + if (CONFIG_VP3_DECODER && HAVE_YASM) { + c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; + } + + if (CONFIG_VP3_DECODER + && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; + } + + SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, ); + + if (!high_bit_depth) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, ); + } else if (bit_depth == 10) { +#if HAVE_YASM +#if !ARCH_X86_64 + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); +#endif + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); +#endif + } + + SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); + +#if HAVE_YASM + if (!high_bit_depth && CONFIG_H264CHROMA) { + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_mmx2_rnd; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmx2; + c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmx2; + c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmx2; + } + if (bit_depth == 10 && CONFIG_H264CHROMA) { + c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; + c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; + c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; + } + + c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; + + c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; + c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2; + + if (avctx->flags & CODEC_FLAG_BITEXACT) { + c->apply_window_int16 = ff_apply_window_int16_mmxext_ba; + } else { + c->apply_window_int16 = ff_apply_window_int16_mmxext; + } +#endif +} + +static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, + int mm_flags) { - int mm_flags = av_get_cpu_flags(); const int high_bit_depth = avctx->bits_per_raw_sample > 8; + + c->prefetch = prefetch_3dnow; + + if (!high_bit_depth) { + c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; + c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; + + c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; + + c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; + c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; + + c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; + + if (!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; + + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; + } + } + + if (CONFIG_VP3_DECODER + && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; + } + + SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, ); + + if (!high_bit_depth) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, ); + } + + SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, ); + +#if HAVE_YASM + if (!high_bit_depth && CONFIG_H264CHROMA) { + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_3dnow_rnd; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; + } +#endif + + c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; + c->vector_fmul = vector_fmul_3dnow; + c->vector_fmul_add = vector_fmul_add_3dnow; + +#if HAVE_7REGS + c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; +#endif +} + +static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx, + int mm_flags) +{ + c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; +#if HAVE_6REGS + c->vector_fmul_window = vector_fmul_window_3dnow2; +#endif +} + +static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) +{ + const int high_bit_depth = avctx->bits_per_raw_sample > 8; + + if (!high_bit_depth) { + if (!(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){ + /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ + c->clear_block = clear_block_sse; + c->clear_blocks = clear_blocks_sse; + } + } + + c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; + c->ac3_downmix = ac3_downmix_sse; + c->vector_fmul = vector_fmul_sse; + c->vector_fmul_reverse = vector_fmul_reverse_sse; + + if (!(mm_flags & AV_CPU_FLAG_3DNOW)) + c->vector_fmul_add = vector_fmul_add_sse; + +#if HAVE_6REGS + c->vector_fmul_window = vector_fmul_window_sse; +#endif + + c->vector_clipf = vector_clipf_sse; + +#if HAVE_YASM + c->scalarproduct_float = ff_scalarproduct_float_sse; + c->butterflies_float_interleave = ff_butterflies_float_interleave_sse; + + if (!high_bit_depth) + c->emulated_edge_mc = emulated_edge_mc_sse; + c->gmc = gmc_sse; +#endif +} + +static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, + int mm_flags) +{ + const int bit_depth = avctx->bits_per_raw_sample; + const int high_bit_depth = bit_depth > 8; + + if (mm_flags & AV_CPU_FLAG_3DNOW) { + // these functions are slower than mmx on AMD, but faster on Intel + if (!high_bit_depth) { + c->put_pixels_tab[0][0] = put_pixels16_sse2; + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2; + c->avg_pixels_tab[0][0] = avg_pixels16_sse2; + H264_QPEL_FUNCS(0, 0, sse2); + } + } + + if (!high_bit_depth) { + H264_QPEL_FUNCS(0, 1, sse2); + H264_QPEL_FUNCS(0, 2, sse2); + H264_QPEL_FUNCS(0, 3, sse2); + H264_QPEL_FUNCS(1, 1, sse2); + H264_QPEL_FUNCS(1, 2, sse2); + H264_QPEL_FUNCS(1, 3, sse2); + H264_QPEL_FUNCS(2, 1, sse2); + H264_QPEL_FUNCS(2, 2, sse2); + H264_QPEL_FUNCS(2, 3, sse2); + H264_QPEL_FUNCS(3, 1, sse2); + H264_QPEL_FUNCS(3, 2, sse2); + H264_QPEL_FUNCS(3, 3, sse2); + } + +#if HAVE_YASM + if (bit_depth == 10) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); + H264_QPEL_FUNCS_10(1, 0, sse2_cache64); + H264_QPEL_FUNCS_10(2, 0, sse2_cache64); + H264_QPEL_FUNCS_10(3, 0, sse2_cache64); + + if (CONFIG_H264CHROMA) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; + } + } + + c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; + c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; + if (mm_flags & AV_CPU_FLAG_ATOM) { + c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; + } else { + c->vector_clip_int32 = ff_vector_clip_int32_sse2; + } + if (avctx->flags & CODEC_FLAG_BITEXACT) { + c->apply_window_int16 = ff_apply_window_int16_sse2_ba; + } else if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { + c->apply_window_int16 = ff_apply_window_int16_sse2; + } + c->bswap_buf = ff_bswap32_buf_sse2; +#endif +} + +static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, + int mm_flags) +{ +#if HAVE_SSSE3 + const int high_bit_depth = avctx->bits_per_raw_sample > 8; + const int bit_depth = avctx->bits_per_raw_sample; + + if (!high_bit_depth) { + H264_QPEL_FUNCS(1, 0, ssse3); + H264_QPEL_FUNCS(1, 1, ssse3); + H264_QPEL_FUNCS(1, 2, ssse3); + H264_QPEL_FUNCS(1, 3, ssse3); + H264_QPEL_FUNCS(2, 0, ssse3); + H264_QPEL_FUNCS(2, 1, ssse3); + H264_QPEL_FUNCS(2, 2, ssse3); + H264_QPEL_FUNCS(2, 3, ssse3); + H264_QPEL_FUNCS(3, 0, ssse3); + H264_QPEL_FUNCS(3, 1, ssse3); + H264_QPEL_FUNCS(3, 2, ssse3); + H264_QPEL_FUNCS(3, 3, ssse3); + } +#if HAVE_YASM + else if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); + } + if (!high_bit_depth && CONFIG_H264CHROMA) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_ssse3_rnd; + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_ssse3_rnd; + c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; + } + c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; + if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe + c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; + + if (mm_flags & AV_CPU_FLAG_ATOM) { + c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; + } else { + c->apply_window_int16 = ff_apply_window_int16_ssse3; + } + if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) { // cachesplit + c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; + } + c->bswap_buf = ff_bswap32_buf_ssse3; +#endif +#endif +} + +static void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, + int mm_flags) +{ +#if HAVE_YASM + c->vector_clip_int32 = ff_vector_clip_int32_sse4; +#endif +} + +static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) +{ +#if HAVE_AVX && HAVE_YASM const int bit_depth = avctx->bits_per_raw_sample; + if (bit_depth == 10) { + // AVX implies !cache64. + // TODO: Port cache(32|64) detection from x264. + H264_QPEL_FUNCS_10(1, 0, sse2); + H264_QPEL_FUNCS_10(2, 0, sse2); + H264_QPEL_FUNCS_10(3, 0, sse2); + + if (CONFIG_H264CHROMA) { + c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; + c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; + } + } + c->butterflies_float_interleave = ff_butterflies_float_interleave_avx; +#endif +} + +void ff_dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) +{ + int mm_flags = av_get_cpu_flags(); + if (avctx->dsp_mask) { if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) mm_flags |= (avctx->dsp_mask & 0xffff); @@ -2498,432 +2956,32 @@ void ff_dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } } - c->put_pixels_clamped = ff_put_pixels_clamped_mmx; - c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; - c->add_pixels_clamped = ff_add_pixels_clamped_mmx; - if (!high_bit_depth) { - c->clear_block = clear_block_mmx; - c->clear_blocks = clear_blocks_mmx; - if ((mm_flags & AV_CPU_FLAG_SSE) && - !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){ - /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ - c->clear_block = clear_block_sse; - c->clear_blocks = clear_blocks_sse; - } - } - -#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ - c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ - c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU - - if (!high_bit_depth) { - SET_HPEL_FUNCS(put, 0, 16, mmx); - SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx); - SET_HPEL_FUNCS(avg, 0, 16, mmx); - SET_HPEL_FUNCS(avg_no_rnd, 0, 16, mmx); - SET_HPEL_FUNCS(put, 1, 8, mmx); - SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx); - SET_HPEL_FUNCS(avg, 1, 8, mmx); - SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx); - } - -#if ARCH_X86_32 || !HAVE_YASM - c->gmc= gmc_mmx; -#endif -#if ARCH_X86_32 && HAVE_YASM - if (!high_bit_depth) - c->emulated_edge_mc = emulated_edge_mc_mmx; -#endif - - c->add_bytes= add_bytes_mmx; - - if (!high_bit_depth) - c->draw_edges = draw_edges_mmx; - - c->put_no_rnd_pixels_l2[0]= put_vp_no_rnd_pixels16_l2_mmx; - c->put_no_rnd_pixels_l2[1]= put_vp_no_rnd_pixels8_l2_mmx; - - if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { - c->h263_v_loop_filter= h263_v_loop_filter_mmx; - c->h263_h_loop_filter= h263_h_loop_filter_mmx; - } - -#if HAVE_YASM - if (!high_bit_depth && CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; - c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; - } - - c->vector_clip_int32 = ff_vector_clip_int32_mmx; -#endif - - if (mm_flags & AV_CPU_FLAG_MMX2) { - c->prefetch = prefetch_mmx2; - - if (!high_bit_depth) { - c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; - c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; - - c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; - c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; - c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; - - c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; - c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; - - c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; - c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; - c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; - } - - if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - if (!high_bit_depth) { - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; - } - - if (CONFIG_VP3_DECODER && HAVE_YASM) { - c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2; - c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2; - } - } - if (CONFIG_VP3_DECODER && HAVE_YASM) { - c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; - } - - if (CONFIG_VP3_DECODER - && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; - } - -#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ - c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU - - SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, ); - - if (!high_bit_depth) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, ); - } - else if (bit_depth == 10) { -#if HAVE_YASM -#if !ARCH_X86_64 - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); -#endif - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); -#endif - } - - SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); - -#if HAVE_YASM - if (!high_bit_depth && CONFIG_H264CHROMA) { - c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; - c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; - c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; - c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2; - } - if (bit_depth == 10 && CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_10_mmxext; - c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_10_mmxext; - c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_10_mmxext; - c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_10_mmxext; - } - - c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; -#endif -#if HAVE_7REGS - if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) - c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; -#endif - } else if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) { - c->prefetch = prefetch_3dnow; - - if (!high_bit_depth) { - c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; - c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; - - c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; - c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; - c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; - - c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; - c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; - - c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; - c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; - c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; - - if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; - } - } + dsputil_init_mmx(c, avctx, mm_flags); + } - if (CONFIG_VP3_DECODER - && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; - } + if (mm_flags & AV_CPU_FLAG_MMX2) + dsputil_init_mmx2(c, avctx, mm_flags); - SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, ); - - if (!high_bit_depth) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, ); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, ); - } + if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) + dsputil_init_3dnow(c, avctx, mm_flags); - SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, ); + if (HAVE_AMD3DNOWEXT && (mm_flags & AV_CPU_FLAG_3DNOWEXT)) + dsputil_init_3dnow2(c, avctx, mm_flags); -#if HAVE_YASM - if (!high_bit_depth && CONFIG_H264CHROMA) { - c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; - c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; - } + if (HAVE_SSE && (mm_flags & AV_CPU_FLAG_SSE)) + dsputil_init_sse(c, avctx, mm_flags); -#endif - } - - -#define H264_QPEL_FUNCS(x, y, CPU)\ - c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\ - c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\ - c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\ - c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; - if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){ - // these functions are slower than mmx on AMD, but faster on Intel - if (!high_bit_depth) { - c->put_pixels_tab[0][0] = put_pixels16_sse2; - c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2; - c->avg_pixels_tab[0][0] = avg_pixels16_sse2; - H264_QPEL_FUNCS(0, 0, sse2); - } - } - if(mm_flags & AV_CPU_FLAG_SSE2){ - if (!high_bit_depth) { - H264_QPEL_FUNCS(0, 1, sse2); - H264_QPEL_FUNCS(0, 2, sse2); - H264_QPEL_FUNCS(0, 3, sse2); - H264_QPEL_FUNCS(1, 1, sse2); - H264_QPEL_FUNCS(1, 2, sse2); - H264_QPEL_FUNCS(1, 3, sse2); - H264_QPEL_FUNCS(2, 1, sse2); - H264_QPEL_FUNCS(2, 2, sse2); - H264_QPEL_FUNCS(2, 3, sse2); - H264_QPEL_FUNCS(3, 1, sse2); - H264_QPEL_FUNCS(3, 2, sse2); - H264_QPEL_FUNCS(3, 3, sse2); - } -#if HAVE_YASM -#define H264_QPEL_FUNCS_10(x, y, CPU)\ - c->put_h264_qpel_pixels_tab[0][x+y*4] = ff_put_h264_qpel16_mc##x##y##_10_##CPU;\ - c->put_h264_qpel_pixels_tab[1][x+y*4] = ff_put_h264_qpel8_mc##x##y##_10_##CPU;\ - c->avg_h264_qpel_pixels_tab[0][x+y*4] = ff_avg_h264_qpel16_mc##x##y##_10_##CPU;\ - c->avg_h264_qpel_pixels_tab[1][x+y*4] = ff_avg_h264_qpel8_mc##x##y##_10_##CPU; - if (bit_depth == 10) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); - H264_QPEL_FUNCS_10(1, 0, sse2_cache64) - H264_QPEL_FUNCS_10(2, 0, sse2_cache64) - H264_QPEL_FUNCS_10(3, 0, sse2_cache64) - - if (CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; - c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; - } - } -#endif - } -#if HAVE_SSSE3 - if(mm_flags & AV_CPU_FLAG_SSSE3){ - if (!high_bit_depth) { - H264_QPEL_FUNCS(1, 0, ssse3); - H264_QPEL_FUNCS(1, 1, ssse3); - H264_QPEL_FUNCS(1, 2, ssse3); - H264_QPEL_FUNCS(1, 3, ssse3); - H264_QPEL_FUNCS(2, 0, ssse3); - H264_QPEL_FUNCS(2, 1, ssse3); - H264_QPEL_FUNCS(2, 2, ssse3); - H264_QPEL_FUNCS(2, 3, ssse3); - H264_QPEL_FUNCS(3, 0, ssse3); - H264_QPEL_FUNCS(3, 1, ssse3); - H264_QPEL_FUNCS(3, 2, ssse3); - H264_QPEL_FUNCS(3, 3, ssse3); - } -#if HAVE_YASM - else if (bit_depth == 10) { - H264_QPEL_FUNCS_10(1, 0, ssse3_cache64) - H264_QPEL_FUNCS_10(2, 0, ssse3_cache64) - H264_QPEL_FUNCS_10(3, 0, ssse3_cache64) - } - if (!high_bit_depth && CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; - c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; - c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; - c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3; - } - c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; - if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe - c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; -#endif - } -#endif - - if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) { - c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; - c->vector_fmul = vector_fmul_3dnow; - } - if (HAVE_AMD3DNOWEXT && (mm_flags & AV_CPU_FLAG_3DNOWEXT)) { - c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; -#if HAVE_6REGS - c->vector_fmul_window = vector_fmul_window_3dnow2; -#endif - } - if(mm_flags & AV_CPU_FLAG_MMX2){ -#if HAVE_YASM - c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2; - if (avctx->flags & CODEC_FLAG_BITEXACT) { - c->apply_window_int16 = ff_apply_window_int16_mmxext_ba; - } else { - c->apply_window_int16 = ff_apply_window_int16_mmxext; - } -#endif - } - if(mm_flags & AV_CPU_FLAG_SSE){ - c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; - c->ac3_downmix = ac3_downmix_sse; - c->vector_fmul = vector_fmul_sse; - c->vector_fmul_reverse = vector_fmul_reverse_sse; - c->vector_fmul_add = vector_fmul_add_sse; -#if HAVE_6REGS - c->vector_fmul_window = vector_fmul_window_sse; -#endif - c->vector_clipf = vector_clipf_sse; -#if HAVE_YASM - c->scalarproduct_float = ff_scalarproduct_float_sse; - c->butterflies_float_interleave = ff_butterflies_float_interleave_sse; + if (mm_flags & AV_CPU_FLAG_SSE2) + dsputil_init_sse2(c, avctx, mm_flags); - if (!high_bit_depth) - c->emulated_edge_mc = emulated_edge_mc_sse; - c->gmc = gmc_sse; -#endif - } - if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) - c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse - if(mm_flags & AV_CPU_FLAG_SSE2){ -#if HAVE_YASM - c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; - if (mm_flags & AV_CPU_FLAG_ATOM) { - c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; - } else { - c->vector_clip_int32 = ff_vector_clip_int32_sse2; - } - if (avctx->flags & CODEC_FLAG_BITEXACT) { - c->apply_window_int16 = ff_apply_window_int16_sse2_ba; - } else { - if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { - c->apply_window_int16 = ff_apply_window_int16_sse2; - } - } - c->bswap_buf = ff_bswap32_buf_sse2; -#endif - } - if (mm_flags & AV_CPU_FLAG_SSSE3) { -#if HAVE_YASM - if (mm_flags & AV_CPU_FLAG_ATOM) { - c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; - } else { - c->apply_window_int16 = ff_apply_window_int16_ssse3; - } - if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) { // cachesplit - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; - } - c->bswap_buf = ff_bswap32_buf_ssse3; -#endif - } + if (mm_flags & AV_CPU_FLAG_SSSE3) + dsputil_init_ssse3(c, avctx, mm_flags); - if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { -#if HAVE_YASM - c->vector_clip_int32 = ff_vector_clip_int32_sse4; -#endif - } + if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) + dsputil_init_sse4(c, avctx, mm_flags); -#if HAVE_AVX && HAVE_YASM - if (mm_flags & AV_CPU_FLAG_AVX) { - if (bit_depth == 10) { - //AVX implies !cache64. - //TODO: Port cache(32|64) detection from x264. - H264_QPEL_FUNCS_10(1, 0, sse2) - H264_QPEL_FUNCS_10(2, 0, sse2) - H264_QPEL_FUNCS_10(3, 0, sse2) - - if (CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; - c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; - } - } - c->butterflies_float_interleave = ff_butterflies_float_interleave_avx; - } -#endif - } + if (mm_flags & AV_CPU_FLAG_AVX) + dsputil_init_avx(c, avctx, mm_flags); if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx); |