aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-05-05 03:09:48 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-05-05 03:30:24 +0200
commitb000b86e1dd03c4ff89cd63a6fa88fc280947c94 (patch)
tree8ba961dc8c013885d7bdfe944fb7cb31d5dc6d95 /libavcodec
parent9a5624a0f1b205e966391645a512c6dccdce42cd (diff)
parentaf1ca249e8eb685823dd0dade3aa3c1d119a61ec (diff)
downloadffmpeg-b000b86e1dd03c4ff89cd63a6fa88fc280947c94.tar.gz
Merge remote branch 'qatar/master'
* qatar/master: (23 commits) doc: Check standalone compilation before submitting new components. Fix standalone compilation of pipe protocol. Fix standalone compilation of ac3_fixed encoder. Fix standalone compilation of binkaudio_dct / binkaudio_rdft decoders. Fix standalone compilation of IMC decoder. Fix standalone compilation of WTV demuxer. Fix standalone compilation of MXPEG decoder. flashsv: K&R cosmetics matroskaenc: fix memory leak vc1: make overlap filter for I-frames bit-exact. vc1dec: use s->start/end_mb_y instead of passing them as function args. Revert "VC1: merge idct8x8, coeff adjustments and put_pixels." Replace strncpy() with av_strlcpy(). indeo3: Eliminate use of long. get_bits: make cache unsigned to eliminate undefined signed overflow. asfdec: fix assert failure on invalid files avfilter: check malloc return values. Not pulled as reason for reindent is not pulled: mpegvideo: reindent. nutenc: check malloc return values. Not pulled due to much simpler solution in ffmpeg *: don't av_malloc(0). ... Conflicts: doc/developer.texi libavcodec/Makefile libavcodec/get_bits.h libavcodec/mpegvideo.c libavformat/Makefile libavutil/log.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/Makefile3
-rw-r--r--libavcodec/ac3enc.c43
-rw-r--r--libavcodec/ass.c4
-rw-r--r--libavcodec/flashsv.c4
-rw-r--r--libavcodec/get_bits.h2
-rw-r--r--libavcodec/indeo3.c22
-rw-r--r--libavcodec/ppc/vc1dsp_altivec.c63
-rw-r--r--libavcodec/vc1.c28
-rw-r--r--libavcodec/vc1.h2
-rw-r--r--libavcodec/vc1dec.c382
-rw-r--r--libavcodec/vc1dsp.c108
-rw-r--r--libavcodec/vc1dsp.h12
12 files changed, 398 insertions, 275 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 6c75b6510b..a4b6f9a406 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -277,7 +277,7 @@ OBJS-$(CONFIG_MSRLE_DECODER) += msrle.o msrledec.o
OBJS-$(CONFIG_MSVIDEO1_DECODER) += msvideo1.o
OBJS-$(CONFIG_MSVIDEO1_ENCODER) += msvideo1enc.o elbg.o
OBJS-$(CONFIG_MSZH_DECODER) += lcldec.o
-OBJS-$(CONFIG_MXPEG_DECODER) += mxpegdec.o
+OBJS-$(CONFIG_MXPEG_DECODER) += mxpegdec.o mjpegdec.o mjpeg.o
OBJS-$(CONFIG_NELLYMOSER_DECODER) += nellymoserdec.o nellymoser.o
OBJS-$(CONFIG_NELLYMOSER_ENCODER) += nellymoserenc.o nellymoser.o
OBJS-$(CONFIG_NUV_DECODER) += nuv.o rtjpeg.o
@@ -556,6 +556,7 @@ OBJS-$(CONFIG_SPDIF_DEMUXER) += aacadtsdec.o mpeg4audio.o
OBJS-$(CONFIG_WEBM_MUXER) += xiph.o mpeg4audio.o \
flacdec.o flacdata.o flac.o \
mpegaudiodata.o vorbis_data.o
+OBJS-$(CONFIG_WTV_DEMUXER) += mpeg4audio.o mpegaudiodata.o
# external codec libraries
OBJS-$(CONFIG_LIBCELT_DECODER) += libcelt_dec.o
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 48df4b7e6b..90d65bf44d 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -33,6 +33,7 @@
#include "libavutil/audioconvert.h"
#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
#include "libavutil/crc.h"
#include "libavutil/opt.h"
#include "avcodec.h"
@@ -1551,10 +1552,10 @@ static void dprint_options(AVCodecContext *avctx)
char strbuf[32];
switch (s->bitstream_id) {
- case 6: strncpy(strbuf, "AC-3 (alt syntax)", 32); break;
- case 8: strncpy(strbuf, "AC-3 (standard)", 32); break;
- case 9: strncpy(strbuf, "AC-3 (dnet half-rate)", 32); break;
- case 10: strncpy(strbuf, "AC-3 (dnet quater-rate", 32); break;
+ case 6: av_strlcpy(strbuf, "AC-3 (alt syntax)", 32); break;
+ case 8: av_strlcpy(strbuf, "AC-3 (standard)", 32); break;
+ case 9: av_strlcpy(strbuf, "AC-3 (dnet half-rate)", 32); break;
+ case 10: av_strlcpy(strbuf, "AC-3 (dnet quater-rate", 32); break;
default: snprintf(strbuf, 32, "ERROR");
}
av_dlog(avctx, "bitstream_id: %s (%d)\n", strbuf, s->bitstream_id);
@@ -1581,9 +1582,9 @@ static void dprint_options(AVCodecContext *avctx)
if (opt->audio_production_info) {
av_dlog(avctx, "mixing_level: %ddB\n", opt->mixing_level);
switch (opt->room_type) {
- case 0: strncpy(strbuf, "notindicated", 32); break;
- case 1: strncpy(strbuf, "large", 32); break;
- case 2: strncpy(strbuf, "small", 32); break;
+ case 0: av_strlcpy(strbuf, "notindicated", 32); break;
+ case 1: av_strlcpy(strbuf, "large", 32); break;
+ case 2: av_strlcpy(strbuf, "small", 32); break;
default: snprintf(strbuf, 32, "ERROR (%d)", opt->room_type);
}
av_dlog(avctx, "room_type: %s\n", strbuf);
@@ -1595,9 +1596,9 @@ static void dprint_options(AVCodecContext *avctx)
av_dlog(avctx, "dialnorm: %ddB\n", opt->dialogue_level);
if (s->channel_mode == AC3_CHMODE_STEREO) {
switch (opt->dolby_surround_mode) {
- case 0: strncpy(strbuf, "notindicated", 32); break;
- case 1: strncpy(strbuf, "on", 32); break;
- case 2: strncpy(strbuf, "off", 32); break;
+ case 0: av_strlcpy(strbuf, "notindicated", 32); break;
+ case 1: av_strlcpy(strbuf, "on", 32); break;
+ case 2: av_strlcpy(strbuf, "off", 32); break;
default: snprintf(strbuf, 32, "ERROR (%d)", opt->dolby_surround_mode);
}
av_dlog(avctx, "dsur_mode: %s\n", strbuf);
@@ -1609,9 +1610,9 @@ static void dprint_options(AVCodecContext *avctx)
if (s->bitstream_id == 6) {
if (opt->extended_bsi_1) {
switch (opt->preferred_stereo_downmix) {
- case 0: strncpy(strbuf, "notindicated", 32); break;
- case 1: strncpy(strbuf, "ltrt", 32); break;
- case 2: strncpy(strbuf, "loro", 32); break;
+ case 0: av_strlcpy(strbuf, "notindicated", 32); break;
+ case 1: av_strlcpy(strbuf, "ltrt", 32); break;
+ case 2: av_strlcpy(strbuf, "loro", 32); break;
default: snprintf(strbuf, 32, "ERROR (%d)", opt->preferred_stereo_downmix);
}
av_dlog(avctx, "dmix_mode: %s\n", strbuf);
@@ -1628,23 +1629,23 @@ static void dprint_options(AVCodecContext *avctx)
}
if (opt->extended_bsi_2) {
switch (opt->dolby_surround_ex_mode) {
- case 0: strncpy(strbuf, "notindicated", 32); break;
- case 1: strncpy(strbuf, "on", 32); break;
- case 2: strncpy(strbuf, "off", 32); break;
+ case 0: av_strlcpy(strbuf, "notindicated", 32); break;
+ case 1: av_strlcpy(strbuf, "on", 32); break;
+ case 2: av_strlcpy(strbuf, "off", 32); break;
default: snprintf(strbuf, 32, "ERROR (%d)", opt->dolby_surround_ex_mode);
}
av_dlog(avctx, "dsurex_mode: %s\n", strbuf);
switch (opt->dolby_headphone_mode) {
- case 0: strncpy(strbuf, "notindicated", 32); break;
- case 1: strncpy(strbuf, "on", 32); break;
- case 2: strncpy(strbuf, "off", 32); break;
+ case 0: av_strlcpy(strbuf, "notindicated", 32); break;
+ case 1: av_strlcpy(strbuf, "on", 32); break;
+ case 2: av_strlcpy(strbuf, "off", 32); break;
default: snprintf(strbuf, 32, "ERROR (%d)", opt->dolby_headphone_mode);
}
av_dlog(avctx, "dheadphone_mode: %s\n", strbuf);
switch (opt->ad_converter_type) {
- case 0: strncpy(strbuf, "standard", 32); break;
- case 1: strncpy(strbuf, "hdcd", 32); break;
+ case 0: av_strlcpy(strbuf, "standard", 32); break;
+ case 1: av_strlcpy(strbuf, "hdcd", 32); break;
default: snprintf(strbuf, 32, "ERROR (%d)", opt->ad_converter_type);
}
av_dlog(avctx, "ad_conv_type: %s\n", strbuf);
diff --git a/libavcodec/ass.c b/libavcodec/ass.c
index a23567cba4..cb0babf2c8 100644
--- a/libavcodec/ass.c
+++ b/libavcodec/ass.c
@@ -21,6 +21,7 @@
#include "avcodec.h"
#include "ass.h"
+#include "libavutil/avstring.h"
int ff_ass_subtitle_header(AVCodecContext *avctx,
const char *font, int font_size,
@@ -97,8 +98,7 @@ int ff_ass_add_rect(AVSubtitle *sub, const char *dialog,
rects[sub->num_rects]->type = SUBTITLE_ASS;
rects[sub->num_rects]->ass = av_malloc(len + dlen + 1);
strcpy (rects[sub->num_rects]->ass , header);
- strncpy(rects[sub->num_rects]->ass + len, dialog, dlen);
- rects[sub->num_rects]->ass[len+dlen] = 0;
+ av_strlcpy(rects[sub->num_rects]->ass + len, dialog, dlen + 1);
sub->num_rects++;
return dlen;
}
diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c
index 12aabe6004..18327ad462 100644
--- a/libavcodec/flashsv.c
+++ b/libavcodec/flashsv.c
@@ -148,7 +148,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
if ((avctx->width != s->image_width) || (avctx->height != s->image_height)) {
av_log(avctx, AV_LOG_ERROR, "Frame width or height differs from first frames!\n");
av_log(avctx, AV_LOG_ERROR, "fh = %d, fv %d vs ch = %d, cv = %d\n", avctx->height,
- avctx->width,s->image_height, s->image_width);
+ avctx->width, s->image_height, s->image_width);
return -1;
}
@@ -195,7 +195,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
s->zstream.next_in = buf + (get_bits_count(&gb) / 8);
s->zstream.avail_in = size;
s->zstream.next_out = s->tmpblock;
- s->zstream.avail_out = s->block_size*3;
+ s->zstream.avail_out = s->block_size * 3;
ret = inflate(&(s->zstream), Z_FINISH);
if (ret == Z_DATA_ERROR) {
av_log(avctx, AV_LOG_ERROR, "Zlib resync occurred\n");
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 2f1a851853..8579c87cd1 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -127,7 +127,7 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# define OPEN_READER(name, gb) \
unsigned int name##_index = (gb)->index; \
- av_unused int name##_cache
+ av_unused unsigned int name##_cache
# define CLOSE_READER(name, gb) (gb)->index = name##_index
diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c
index c9d8573692..14526a239c 100644
--- a/libavcodec/indeo3.c
+++ b/libavcodec/indeo3.c
@@ -149,13 +149,13 @@ static av_cold void iv_free_func(Indeo3DecodeContext *s)
}
struct ustr {
- long xpos;
- long ypos;
- long width;
- long height;
- long split_flag;
- long split_direction;
- long usl7;
+ int xpos;
+ int ypos;
+ int width;
+ int height;
+ int split_flag;
+ int split_direction;
+ int usl7;
};
@@ -203,12 +203,12 @@ struct ustr {
static void iv_Decode_Chunk(Indeo3DecodeContext *s,
uint8_t *cur, uint8_t *ref, int width, int height,
- const uint8_t *buf1, long cb_offset, const uint8_t *hdr,
+ const uint8_t *buf1, int cb_offset, const uint8_t *hdr,
const uint8_t *buf2, int min_width_160)
{
uint8_t bit_buf;
- unsigned long bit_pos, lv, lv1, lv2;
- long *width_tbl, width_tbl_arr[10];
+ unsigned int bit_pos, lv, lv1, lv2;
+ int *width_tbl, width_tbl_arr[10];
const signed char *ref_vectors;
uint8_t *cur_frm_pos, *ref_frm_pos, *cp, *cp2;
uint32_t *cur_lp, *ref_lp;
@@ -982,7 +982,7 @@ static int iv_decode_frame(AVCodecContext *avctx,
Indeo3DecodeContext *s = avctx->priv_data;
unsigned int image_width, image_height,
chroma_width, chroma_height;
- unsigned long flags, cb_offset, data_size,
+ unsigned int flags, cb_offset, data_size,
y_offset, v_offset, u_offset, mc_vector_count;
const uint8_t *hdr_pos, *buf_pos;
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index 05edb53b7c..69670619da 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -130,8 +130,7 @@ do { \
/** Do inverse transform on 8x8 block
*/
-static void vc1_inv_trans_8x8_altivec(DCTELEM block[64],
- int sign, int rangered)
+static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
{
vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
@@ -145,9 +144,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64],
const vector unsigned int vec_2 = vec_splat_u32(2);
const vector signed int vec_1s = vec_splat_s32(1);
const vector unsigned int vec_1 = vec_splat_u32(1);
- const vector unsigned short rangered_shift = vec_splat_u16(1);
- const vector signed short signed_bias = vec_sl(vec_splat_s16(4),
- vec_splat_u16(4));
src0 = vec_ld( 0, block);
src1 = vec_ld( 16, block);
@@ -217,27 +213,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64],
src6 = vec_pack(sE, s6);
src7 = vec_pack(sF, s7);
- if (rangered) {
- if (!sign) {
- src0 = vec_sub(src0, signed_bias);
- src1 = vec_sub(src1, signed_bias);
- src2 = vec_sub(src2, signed_bias);
- src3 = vec_sub(src3, signed_bias);
- src4 = vec_sub(src4, signed_bias);
- src5 = vec_sub(src5, signed_bias);
- src6 = vec_sub(src6, signed_bias);
- src7 = vec_sub(src7, signed_bias);
- }
- src0 = vec_sl(src0, rangered_shift);
- src1 = vec_sl(src1, rangered_shift);
- src2 = vec_sl(src2, rangered_shift);
- src3 = vec_sl(src3, rangered_shift);
- src4 = vec_sl(src4, rangered_shift);
- src5 = vec_sl(src5, rangered_shift);
- src6 = vec_sl(src6, rangered_shift);
- src7 = vec_sl(src7, rangered_shift);
- }
-
vec_st(src0, 0, block);
vec_st(src1, 16, block);
vec_st(src2, 32, block);
@@ -248,36 +223,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64],
vec_st(src7,112, block);
}
-static void vc1_inv_trans_8x8_add_altivec(uint8_t *dest, int stride, DCTELEM *b)
-{
- vc1_inv_trans_8x8_altivec(b, 0, 0);
- ff_add_pixels_clamped_c(b, dest, stride);
-}
-
-static void vc1_inv_trans_8x8_put_signed_altivec(uint8_t *dest, int stride, DCTELEM *b)
-{
- vc1_inv_trans_8x8_altivec(b, 1, 0);
- ff_put_signed_pixels_clamped_c(b, dest, stride);
-}
-
-static void vc1_inv_trans_8x8_put_signed_rangered_altivec(uint8_t *dest, int stride, DCTELEM *b)
-{
- vc1_inv_trans_8x8_altivec(b, 1, 1);
- ff_put_signed_pixels_clamped_c(b, dest, stride);
-}
-
-static void vc1_inv_trans_8x8_put_altivec(uint8_t *dest, int stride, DCTELEM *b)
-{
- vc1_inv_trans_8x8_altivec(b, 0, 0);
- ff_put_pixels_clamped_c(b, dest, stride);
-}
-
-static void vc1_inv_trans_8x8_put_rangered_altivec(uint8_t *dest, int stride, DCTELEM *b)
-{
- vc1_inv_trans_8x8_altivec(b, 0, 1);
- ff_put_pixels_clamped_c(b, dest, stride);
-}
-
/** Do inverse transform on 8x4 part of block
*/
static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block)
@@ -396,11 +341,7 @@ void ff_vc1dsp_init_altivec(VC1DSPContext* dsp)
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
return;
- dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_altivec;
- dsp->vc1_inv_trans_8x8_put_signed[0] = vc1_inv_trans_8x8_put_signed_altivec;
- dsp->vc1_inv_trans_8x8_put_signed[1] = vc1_inv_trans_8x8_put_signed_rangered_altivec;
- dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_altivec;
- dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_rangered_altivec;
+ dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec;
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index ef44a34f50..e062a35cc1 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -280,28 +280,6 @@ static int vop_dquant_decoding(VC1Context *v)
static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb);
-static void simple_idct_put_rangered(uint8_t *dest, int line_size, DCTELEM *block)
-{
- int i;
- ff_simple_idct(block);
- for (i = 0; i < 64; i++) block[i] = (block[i] - 64) << 1;
- ff_put_pixels_clamped_c(block, dest, line_size);
-}
-
-static void simple_idct_put_signed(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_simple_idct(block);
- ff_put_signed_pixels_clamped_c(block, dest, line_size);
-}
-
-static void simple_idct_put_signed_rangered(uint8_t *dest, int line_size, DCTELEM *block)
-{
- int i;
- ff_simple_idct(block);
- for (i = 0; i < 64; i++) block[i] <<= 1;
- ff_put_signed_pixels_clamped_c(block, dest, line_size);
-}
-
/**
* Decode Simple/Main Profiles sequence header
* @see Figure 7-8, p16-17
@@ -359,11 +337,7 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte
v->res_fasttx = get_bits1(gb);
if (!v->res_fasttx)
{
- v->vc1dsp.vc1_inv_trans_8x8_add = ff_simple_idct_add;
- v->vc1dsp.vc1_inv_trans_8x8_put[0] = ff_simple_idct_put;
- v->vc1dsp.vc1_inv_trans_8x8_put[1] = simple_idct_put_rangered;
- v->vc1dsp.vc1_inv_trans_8x8_put_signed[0] = simple_idct_put_signed;
- v->vc1dsp.vc1_inv_trans_8x8_put_signed[1] = simple_idct_put_signed_rangered;
+ v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct;
v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index 19be3c3452..6d4c0aa7a3 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h
@@ -317,6 +317,8 @@ typedef struct VC1Context{
int bi_type;
int x8_type;
+ DCTELEM (*block)[6][64];
+ int n_allocated_blks, cur_blk_idx, left_blk_idx, topleft_blk_idx, top_blk_idx;
uint32_t *cbp_base, *cbp;
uint8_t *is_intra_base, *is_intra;
int16_t (*luma_mv_base)[2], (*luma_mv)[2];
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 125994d8b9..d35e664aca 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -160,6 +160,72 @@ enum Imode {
/** @} */ //Bitplane group
+static void vc1_put_signed_blocks_clamped(VC1Context *v)
+{
+ MpegEncContext *s = &v->s;
+
+ /* The put pixels loop is always one MB row behind the decoding loop,
+ * because we can only put pixels when overlap filtering is done, and
+ * for filtering of the bottom edge of a MB, we need the next MB row
+ * present as well.
+ * Within the row, the put pixels loop is also one MB col behind the
+ * decoding loop. The reason for this is again, because for filtering
+ * of the right MB edge, we need the next MB present. */
+ if (!s->first_slice_line) {
+ if (s->mb_x) {
+ s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0],
+ s->dest[0] - 16 * s->linesize - 16,
+ s->linesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1],
+ s->dest[0] - 16 * s->linesize - 8,
+ s->linesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2],
+ s->dest[0] - 8 * s->linesize - 16,
+ s->linesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3],
+ s->dest[0] - 8 * s->linesize - 8,
+ s->linesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4],
+ s->dest[1] - 8 * s->uvlinesize - 8,
+ s->uvlinesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5],
+ s->dest[2] - 8 * s->uvlinesize - 8,
+ s->uvlinesize);
+ }
+ if (s->mb_x == s->mb_width - 1) {
+ s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0],
+ s->dest[0] - 16 * s->linesize,
+ s->linesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1],
+ s->dest[0] - 16 * s->linesize + 8,
+ s->linesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2],
+ s->dest[0] - 8 * s->linesize,
+ s->linesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3],
+ s->dest[0] - 8 * s->linesize + 8,
+ s->linesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4],
+ s->dest[1] - 8 * s->uvlinesize,
+ s->uvlinesize);
+ s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5],
+ s->dest[2] - 8 * s->uvlinesize,
+ s->uvlinesize);
+ }
+ }
+
+#define inc_blk_idx(idx) do { \
+ idx++; \
+ if (idx >= v->n_allocated_blks) \
+ idx = 0; \
+ } while (0)
+
+ inc_blk_idx(v->topleft_blk_idx);
+ inc_blk_idx(v->top_blk_idx);
+ inc_blk_idx(v->left_blk_idx);
+ inc_blk_idx(v->cur_blk_idx);
+}
+
static void vc1_loop_filter_iblk(VC1Context *v, int pq)
{
MpegEncContext *s = &v->s;
@@ -187,6 +253,151 @@ static void vc1_loop_filter_iblk(VC1Context *v, int pq)
}
}
+static void vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
+{
+ MpegEncContext *s = &v->s;
+ int j;
+
+ /* The loopfilter runs 1 row and 1 column behind the overlap filter, which
+ * means it runs two rows/cols behind the decoding loop. */
+ if (!s->first_slice_line) {
+ if (s->mb_x) {
+ if (s->mb_y >= s->start_mb_y + 2) {
+ v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
+
+ if (s->mb_x >= 2)
+ v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
+ for(j = 0; j < 2; j++) {
+ v->vc1dsp.vc1_v_loop_filter8(s->dest[j+1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
+ if (s->mb_x >= 2) {
+ v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq);
+ }
+ }
+ }
+ v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq);
+ }
+
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_y >= s->start_mb_y + 2) {
+ v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
+
+ if (s->mb_x)
+ v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
+ for(j = 0; j < 2; j++) {
+ v->vc1dsp.vc1_v_loop_filter8(s->dest[j+1] - 8 * s->uvlinesize, s->uvlinesize, pq);
+ if (s->mb_x >= 2) {
+ v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1] - 16 * s->uvlinesize, s->uvlinesize, pq);
+ }
+ }
+ }
+ v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq);
+ }
+
+ if (s->mb_y == s->mb_height) {
+ if (s->mb_x) {
+ if (s->mb_x >= 2)
+ v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
+ if (s->mb_x >= 2) {
+ for(j = 0; j < 2; j++) {
+ v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
+ }
+ }
+ }
+
+ if (s->mb_x == s->mb_width - 1) {
+ if (s->mb_x)
+ v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
+ v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
+ if (s->mb_x) {
+ for(j = 0; j < 2; j++) {
+ v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1] - 8 * s->uvlinesize, s->uvlinesize, pq);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void vc1_smooth_overlap_filter_iblk(VC1Context *v)
+{
+ MpegEncContext *s = &v->s;
+ int mb_pos;
+
+ if (v->condover == CONDOVER_NONE)
+ return;
+
+ mb_pos = s->mb_x + s->mb_y * s->mb_stride;
+
+ /* Within a MB, the horizontal overlap always runs before the vertical.
+ * To accomplish that, we run the H on left and internal borders of the
+ * currently decoded MB. Then, we wait for the next overlap iteration
+ * to do H overlap on the right edge of this MB, before moving over and
+ * running the V overlap. Therefore, the V overlap makes us trail by one
+ * MB col and the H overlap filter makes us trail by one MB row. This
+ * is reflected in the time at which we run the put_pixels loop. */
+ if(v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) {
+ if(s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
+ v->over_flags_plane[mb_pos - 1])) {
+ v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1],
+ v->block[v->cur_blk_idx][0]);
+ v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
+ v->block[v->cur_blk_idx][2]);
+ if(!(s->flags & CODEC_FLAG_GRAY)) {
+ v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
+ v->block[v->cur_blk_idx][4]);
+ v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
+ v->block[v->cur_blk_idx][5]);
+ }
+ }
+ v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0],
+ v->block[v->cur_blk_idx][1]);
+ v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2],
+ v->block[v->cur_blk_idx][3]);
+
+ if (s->mb_x == s->mb_width - 1) {
+ if(!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
+ v->over_flags_plane[mb_pos - s->mb_stride])) {
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2],
+ v->block[v->cur_blk_idx][0]);
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
+ v->block[v->cur_blk_idx][1]);
+ if(!(s->flags & CODEC_FLAG_GRAY)) {
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
+ v->block[v->cur_blk_idx][4]);
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
+ v->block[v->cur_blk_idx][5]);
+ }
+ }
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0],
+ v->block[v->cur_blk_idx][2]);
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1],
+ v->block[v->cur_blk_idx][3]);
+ }
+ }
+ if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) {
+ if(!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
+ v->over_flags_plane[mb_pos - s->mb_stride - 1])) {
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2],
+ v->block[v->left_blk_idx][0]);
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
+ v->block[v->left_blk_idx][1]);
+ if(!(s->flags & CODEC_FLAG_GRAY)) {
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
+ v->block[v->left_blk_idx][4]);
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
+ v->block[v->left_blk_idx][5]);
+ }
+ }
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0],
+ v->block[v->left_blk_idx][2]);
+ v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1],
+ v->block[v->left_blk_idx][3]);
+ }
+}
+
/** Do motion compensation over 1 macroblock
* Mostly adapted hpel_motion and qpel_motion from mpegvideo.c
*/
@@ -2016,7 +2227,8 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
if(i==1)
v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
else{
- v->vc1dsp.vc1_inv_trans_8x8_add(dst, linesize, block);
+ v->vc1dsp.vc1_inv_trans_8x8(block);
+ s->dsp.add_pixels_clamped(block, dst, linesize);
}
}
break;
@@ -2258,7 +2470,7 @@ static int vc1_decode_p_mb(VC1Context *v)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
- int i;
+ int i, j;
int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
@@ -2288,8 +2500,6 @@ static int vc1_decode_p_mb(VC1Context *v)
{
if (!skipped)
{
- vc1_idct_func idct8x8_fn;
-
GET_MVDATA(dmv_x, dmv_y);
if (s->mb_intra) {
@@ -2324,7 +2534,6 @@ static int vc1_decode_p_mb(VC1Context *v)
VC1_TTMB_VLC_BITS, 2);
if(!s->mb_intra) vc1_mc_1mv(v, 0);
dst_idx = 0;
- idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm];
for (i=0; i<6; i++)
{
s->dc_val[0][s->block_index[i]] = 0;
@@ -2342,9 +2551,9 @@ static int vc1_decode_p_mb(VC1Context *v)
vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
- idct8x8_fn(s->dest[dst_idx] + off,
- i & 4 ? s->uvlinesize : s->linesize,
- s->block[i]);
+ v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+ if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
+ s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
if(v->pq >= 9 && v->overlap) {
if(v->c_avail)
v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
@@ -2380,7 +2589,6 @@ static int vc1_decode_p_mb(VC1Context *v)
{
int intra_count = 0, coded_inter = 0;
int is_intra[6], is_coded[6];
- vc1_idct_func idct8x8_fn;
/* Get CBPCY */
cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
for (i=0; i<6; i++)
@@ -2431,7 +2639,6 @@ static int vc1_decode_p_mb(VC1Context *v)
}
if (!v->ttmbf && coded_inter)
ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
- idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm];
for (i=0; i<6; i++)
{
dst_idx += i >> 2;
@@ -2447,9 +2654,9 @@ static int vc1_decode_p_mb(VC1Context *v)
vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset);
if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
- idct8x8_fn(s->dest[dst_idx] + off,
- (i&4)?s->uvlinesize:s->linesize,
- s->block[i]);
+ v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+ if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
+ s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
if(v->pq >= 9 && v->overlap) {
if(v->c_avail)
v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
@@ -2497,7 +2704,7 @@ static void vc1_decode_b_mb(VC1Context *v)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
- int i;
+ int i, j;
int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp = 0; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
@@ -2510,7 +2717,6 @@ static void vc1_decode_b_mb(VC1Context *v)
int skipped, direct;
int dmv_x[2], dmv_y[2];
int bmvtype = BMV_TYPE_BACKWARD;
- vc1_idct_func idct8x8_fn;
mquant = v->pq; /* Loosy initialization */
s->mb_intra = 0;
@@ -2608,7 +2814,6 @@ static void vc1_decode_b_mb(VC1Context *v)
}
}
dst_idx = 0;
- idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm];
for (i=0; i<6; i++)
{
s->dc_val[0][s->block_index[i]] = 0;
@@ -2626,9 +2831,9 @@ static void vc1_decode_b_mb(VC1Context *v)
vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
- idct8x8_fn(s->dest[dst_idx] + off,
- i & 4 ? s->uvlinesize : s->linesize,
- s->block[i]);
+ v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+ if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
+ s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
} else if(val) {
vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), NULL);
if(!v->ttmbf && ttmb < 8) ttmb = -1;
@@ -2641,12 +2846,11 @@ static void vc1_decode_b_mb(VC1Context *v)
*/
static void vc1_decode_i_blocks(VC1Context *v)
{
- int k;
+ int k, j;
MpegEncContext *s = &v->s;
int cbp, val;
uint8_t *coded_val;
int mb_pos;
- vc1_idct_func idct8x8_fn;
/* select codingmode used for VLC tables selection */
switch(v->y_ac_table_index){
@@ -2681,10 +2885,6 @@ static void vc1_decode_i_blocks(VC1Context *v)
s->mb_x = s->mb_y = 0;
s->mb_intra = 1;
s->first_slice_line = 1;
- if(v->pq >= 9 && v->overlap) {
- idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm];
- } else
- idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[!!v->rangeredfrm];
for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
s->mb_x = 0;
ff_init_block_index(s);
@@ -2721,9 +2921,14 @@ static void vc1_decode_i_blocks(VC1Context *v)
vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2);
if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue;
- idct8x8_fn(dst[k],
- k & 4 ? s->uvlinesize : s->linesize,
- s->block[k]);
+ v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
+ if(v->pq >= 9 && v->overlap) {
+ if (v->rangeredfrm) for(j = 0; j < 64; j++) s->block[k][j] <<= 1;
+ s->dsp.put_signed_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize);
+ } else {
+ if (v->rangeredfrm) for(j = 0; j < 64; j++) s->block[k][j] = (s->block[k][j] - 64) << 1;
+ s->dsp.put_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize);
+ }
}
if(v->pq >= 9 && v->overlap) {
@@ -2770,7 +2975,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
/** Decode blocks of I-frame for advanced profile
*/
-static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end)
+static void vc1_decode_i_blocks_adv(VC1Context *v)
{
int k;
MpegEncContext *s = &v->s;
@@ -2779,9 +2984,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end)
int mb_pos;
int mquant = v->pq;
int mqdiff;
- int overlap;
GetBitContext *gb = &s->gb;
- vc1_idct_func idct8x8_fn;
/* select codingmode used for VLC tables selection */
switch(v->y_ac_table_index){
@@ -2812,27 +3015,20 @@ static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end)
s->mb_x = s->mb_y = 0;
s->mb_intra = 1;
s->first_slice_line = 1;
- s->mb_y = mby_start;
- if (mby_start) {
+ s->mb_y = s->start_mb_y;
+ if (s->start_mb_y) {
s->mb_x = 0;
ff_init_block_index(s);
memset(&s->coded_block[s->block_index[0]-s->b8_stride], 0,
s->b8_stride * sizeof(*s->coded_block));
}
- idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[0];
- for(; s->mb_y < mby_end; s->mb_y++) {
+ for(; s->mb_y < s->end_mb_y; s->mb_y++) {
s->mb_x = 0;
ff_init_block_index(s);
for(;s->mb_x < s->mb_width; s->mb_x++) {
- uint8_t *dst[6];
+ DCTELEM (*block)[64] = v->block[v->cur_blk_idx];
ff_update_block_index(s);
- dst[0] = s->dest[0];
- dst[1] = dst[0] + 8;
- dst[2] = s->dest[0] + s->linesize * 8;
- dst[3] = dst[2] + 8;
- dst[4] = s->dest[1];
- dst[5] = s->dest[2];
- s->dsp.clear_blocks(s->block[0]);
+ s->dsp.clear_blocks(block[0]);
mb_pos = s->mb_x + s->mb_y * s->mb_stride;
s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
s->current_picture.motion_val[1][s->block_index[0]][0] = 0;
@@ -2845,13 +3041,8 @@ static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end)
else
v->s.ac_pred = v->acpred_plane[mb_pos];
- if(v->condover == CONDOVER_SELECT) {
- if(v->overflg_is_raw)
- overlap = get_bits1(&v->s.gb);
- else
- overlap = v->over_flags_plane[mb_pos];
- } else
- overlap = (v->condover == CONDOVER_ALL);
+ if (v->condover == CONDOVER_SELECT && v->overflg_is_raw)
+ v->over_flags_plane[mb_pos] = get_bits1(&v->s.gb);
GET_MQUANT();
@@ -2873,40 +3064,18 @@ static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end)
v->a_avail = !s->first_slice_line || (k==2 || k==3);
v->c_avail = !!s->mb_x || (k==1 || k==3);
- vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
+ vc1_decode_i_block_adv(v, block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue;
- idct8x8_fn(dst[k],
- k & 4 ? s->uvlinesize : s->linesize,
- s->block[k]);
+ v->vc1dsp.vc1_inv_trans_8x8(block[k]);
}
- if(overlap) {
- if(s->mb_x) {
- v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
- v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
- if(!(s->flags & CODEC_FLAG_GRAY)) {
- v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
- v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
- }
- }
- v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
- v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
- if(!s->first_slice_line) {
- v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize);
- v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
- if(!(s->flags & CODEC_FLAG_GRAY)) {
- v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
- v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
- }
- }
- v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
- v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
- }
- if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq);
+ vc1_smooth_overlap_filter_iblk(v);
+ vc1_put_signed_blocks_clamped(v);
+ if(v->s.loop_filter) vc1_loop_filter_iblk_delayed(v, v->pq);
if(get_bits_count(&s->gb) > v->bits) {
- ff_er_add_slice(s, 0, mby_start, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
+ ff_er_add_slice(s, 0, s->start_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i\n", get_bits_count(&s->gb), v->bits);
return;
}
@@ -2917,12 +3086,21 @@ static void vc1_decode_i_blocks_adv(VC1Context *v, int mby_start, int mby_end)
ff_draw_horiz_band(s, (s->mb_y-1) * 16, 16);
s->first_slice_line = 0;
}
+
+ /* raw bottom MB row */
+ s->mb_x = 0;
+ ff_init_block_index(s);
+ for(;s->mb_x < s->mb_width; s->mb_x++) {
+ ff_update_block_index(s);
+ vc1_put_signed_blocks_clamped(v);
+ if(v->s.loop_filter) vc1_loop_filter_iblk_delayed(v, v->pq);
+ }
if (v->s.loop_filter)
ff_draw_horiz_band(s, (s->mb_height-1)*16, 16);
- ff_er_add_slice(s, 0, mby_start, s->mb_width - 1, mby_end - 1, (AC_END|DC_END|MV_END));
+ ff_er_add_slice(s, 0, s->start_mb_y, s->mb_width - 1, s->end_mb_y - 1, (AC_END|DC_END|MV_END));
}
-static void vc1_decode_p_blocks(VC1Context *v, int mby_start, int mby_end)
+static void vc1_decode_p_blocks(VC1Context *v)
{
MpegEncContext *s = &v->s;
int apply_loop_filter;
@@ -2955,17 +3133,17 @@ static void vc1_decode_p_blocks(VC1Context *v, int mby_start, int mby_end)
apply_loop_filter = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY);
s->first_slice_line = 1;
memset(v->cbp_base, 0, sizeof(v->cbp_base[0])*2*s->mb_stride);
- for(s->mb_y = mby_start; s->mb_y < mby_end; s->mb_y++) {
+ for(s->mb_y = s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
s->mb_x = 0;
ff_init_block_index(s);
for(; s->mb_x < s->mb_width; s->mb_x++) {
ff_update_block_index(s);
vc1_decode_p_mb(v);
- if (s->mb_y != mby_start && apply_loop_filter)
+ if (s->mb_y != s->start_mb_y && apply_loop_filter)
vc1_apply_p_loop_filter(v);
if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
- ff_er_add_slice(s, 0, mby_start, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
+ ff_er_add_slice(s, 0, s->start_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y);
return;
}
@@ -2974,7 +3152,7 @@ static void vc1_decode_p_blocks(VC1Context *v, int mby_start, int mby_end)
memmove(v->ttblk_base, v->ttblk, sizeof(v->ttblk_base[0])*s->mb_stride);
memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0])*s->mb_stride);
memmove(v->luma_mv_base, v->luma_mv, sizeof(v->luma_mv_base[0])*s->mb_stride);
- if (s->mb_y != mby_start) ff_draw_horiz_band(s, (s->mb_y-1) * 16, 16);
+ if (s->mb_y != s->start_mb_y) ff_draw_horiz_band(s, (s->mb_y-1) * 16, 16);
s->first_slice_line = 0;
}
if (apply_loop_filter) {
@@ -2985,12 +3163,12 @@ static void vc1_decode_p_blocks(VC1Context *v, int mby_start, int mby_end)
vc1_apply_p_loop_filter(v);
}
}
- if (mby_end >= mby_start)
- ff_draw_horiz_band(s, (mby_end-1) * 16, 16);
- ff_er_add_slice(s, 0, mby_start, s->mb_width - 1, mby_end - 1, (AC_END|DC_END|MV_END));
+ if (s->end_mb_y >= s->start_mb_y)
+ ff_draw_horiz_band(s, (s->end_mb_y-1) * 16, 16);
+ ff_er_add_slice(s, 0, s->start_mb_y, s->mb_width - 1, s->end_mb_y - 1, (AC_END|DC_END|MV_END));
}
-static void vc1_decode_b_blocks(VC1Context *v, int mby_start, int mby_end)
+static void vc1_decode_b_blocks(VC1Context *v)
{
MpegEncContext *s = &v->s;
@@ -3020,7 +3198,7 @@ static void vc1_decode_b_blocks(VC1Context *v, int mby_start, int mby_end)
}
s->first_slice_line = 1;
- for(s->mb_y = mby_start; s->mb_y < mby_end; s->mb_y++) {
+ for(s->mb_y = s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
s->mb_x = 0;
ff_init_block_index(s);
for(; s->mb_x < s->mb_width; s->mb_x++) {
@@ -3028,7 +3206,7 @@ static void vc1_decode_b_blocks(VC1Context *v, int mby_start, int mby_end)
vc1_decode_b_mb(v);
if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
- ff_er_add_slice(s, 0, mby_start, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
+ ff_er_add_slice(s, 0, s->start_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y);
return;
}
@@ -3042,7 +3220,7 @@ static void vc1_decode_b_blocks(VC1Context *v, int mby_start, int mby_end)
}
if (v->s.loop_filter)
ff_draw_horiz_band(s, (s->mb_height-1)*16, 16);
- ff_er_add_slice(s, 0, mby_start, s->mb_width - 1, mby_end - 1, (AC_END|DC_END|MV_END));
+ ff_er_add_slice(s, 0, s->start_mb_y, s->mb_width - 1, s->end_mb_y - 1, (AC_END|DC_END|MV_END));
}
static void vc1_decode_skip_blocks(VC1Context *v)
@@ -3064,17 +3242,21 @@ static void vc1_decode_skip_blocks(VC1Context *v)
s->pict_type = AV_PICTURE_TYPE_P;
}
-static void vc1_decode_blocks(VC1Context *v, int mby_start, int mby_end)
+static void vc1_decode_blocks(VC1Context *v)
{
v->s.esc3_level_length = 0;
if(v->x8_type){
ff_intrax8_decode_picture(&v->x8, 2*v->pq+v->halfpq, v->pq*(!v->pquantizer) );
}else{
+ v->cur_blk_idx = 0;
+ v->left_blk_idx = -1;
+ v->topleft_blk_idx = 1;
+ v->top_blk_idx = 2;
switch(v->s.pict_type) {
case AV_PICTURE_TYPE_I:
if(v->profile == PROFILE_ADVANCED)
- vc1_decode_i_blocks_adv(v, mby_start, mby_end);
+ vc1_decode_i_blocks_adv(v);
else
vc1_decode_i_blocks(v);
break;
@@ -3082,16 +3264,16 @@ static void vc1_decode_blocks(VC1Context *v, int mby_start, int mby_end)
if(v->p_frame_skipped)
vc1_decode_skip_blocks(v);
else
- vc1_decode_p_blocks(v, mby_start, mby_end);
+ vc1_decode_p_blocks(v);
break;
case AV_PICTURE_TYPE_B:
if(v->bi_type){
if(v->profile == PROFILE_ADVANCED)
- vc1_decode_i_blocks_adv(v, mby_start, mby_end);
+ vc1_decode_i_blocks_adv(v);
else
vc1_decode_i_blocks(v);
}else
- vc1_decode_b_blocks(v, mby_start, mby_end);
+ vc1_decode_b_blocks(v);
break;
}
}
@@ -3341,6 +3523,8 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
v->acpred_plane = av_malloc(s->mb_stride * s->mb_height);
v->over_flags_plane = av_malloc(s->mb_stride * s->mb_height);
+ v->n_allocated_blks = s->mb_width + 2;
+ v->block = av_malloc(sizeof(*v->block) * v->n_allocated_blks);
v->cbp_base = av_malloc(sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
v->cbp = v->cbp_base + s->mb_stride;
v->ttblk_base = av_malloc(sizeof(v->ttblk_base[0]) * 2 * s->mb_stride);
@@ -3556,8 +3740,9 @@ static int vc1_decode_frame(AVCodecContext *avctx,
for (i = 0; i <= n_slices; i++) {
if (i && get_bits1(&s->gb))
vc1_parse_frame_header_adv(v, &s->gb);
- vc1_decode_blocks(v, i == 0 ? 0 : FFMAX(0, slices[i-1].mby_start),
- i == n_slices ? s->mb_height : FFMIN(s->mb_height, slices[i].mby_start));
+ s->start_mb_y = (i == 0) ? 0 : FFMAX(0, slices[i-1].mby_start);
+ s->end_mb_y = (i == n_slices) ? s->mb_height : FFMIN(s->mb_height, slices[i].mby_start);
+ vc1_decode_blocks(v);
if (i != n_slices) s->gb = slices[i].gb;
}
//av_log(s->avctx, AV_LOG_INFO, "Consumed %i/%i bits\n", get_bits_count(&s->gb), s->gb.size_in_bits);
@@ -3614,6 +3799,7 @@ static av_cold int vc1_decode_end(AVCodecContext *avctx)
av_freep(&v->acpred_plane);
av_freep(&v->over_flags_plane);
av_freep(&v->mb_type_base);
+ av_freep(&v->block);
av_freep(&v->cbp_base);
av_freep(&v->ttblk_base);
av_freep(&v->is_intra_base); // FIXME use v->mb_type[]
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index dbe2120829..2eaa47a05b 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -78,6 +78,58 @@ static void vc1_h_overlap_c(uint8_t* src, int stride)
}
}
+static void vc1_v_s_overlap_c(DCTELEM *top, DCTELEM *bottom)
+{
+ int i;
+ int a, b, c, d;
+ int d1, d2;
+ int rnd1 = 4, rnd2 = 3;
+ for(i = 0; i < 8; i++) {
+ a = top[48];
+ b = top[56];
+ c = bottom[0];
+ d = bottom[8];
+ d1 = a - d;
+ d2 = a - d + b - c;
+
+ top[48] = ((a << 3) - d1 + rnd1) >> 3;
+ top[56] = ((b << 3) - d2 + rnd2) >> 3;
+ bottom[0] = ((c << 3) + d2 + rnd1) >> 3;
+ bottom[8] = ((d << 3) + d1 + rnd2) >> 3;
+
+ bottom++;
+ top++;
+ rnd2 = 7 - rnd2;
+ rnd1 = 7 - rnd1;
+ }
+}
+
+static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right)
+{
+ int i;
+ int a, b, c, d;
+ int d1, d2;
+ int rnd1 = 4, rnd2 = 3;
+ for(i = 0; i < 8; i++) {
+ a = left[6];
+ b = left[7];
+ c = right[0];
+ d = right[1];
+ d1 = a - d;
+ d2 = a - d + b - c;
+
+ left[6] = ((a << 3) - d1 + rnd1) >> 3;
+ left[7] = ((b << 3) - d2 + rnd2) >> 3;
+ right[0] = ((c << 3) + d2 + rnd1) >> 3;
+ right[1] = ((d << 3) + d1 + rnd2) >> 3;
+
+ right += 8;
+ left += 8;
+ rnd2 = 7 - rnd2;
+ rnd1 = 7 - rnd1;
+ }
+}
+
/**
* VC-1 in-loop deblocking filter for one line
* @param src source block type
@@ -199,7 +251,7 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
}
}
-static av_always_inline void vc1_inv_trans_8x8_c(DCTELEM block[64], int shl, int sub)
+static void vc1_inv_trans_8x8_c(DCTELEM block[64])
{
int i;
register int t1,t2,t3,t4,t5,t6,t7,t8;
@@ -254,50 +306,20 @@ static av_always_inline void vc1_inv_trans_8x8_c(DCTELEM block[64], int shl, int
t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
- dst[ 0] = (((t5 + t1 ) >> 7) - sub) << shl;
- dst[ 8] = (((t6 + t2 ) >> 7) - sub) << shl;
- dst[16] = (((t7 + t3 ) >> 7) - sub) << shl;
- dst[24] = (((t8 + t4 ) >> 7) - sub) << shl;
- dst[32] = (((t8 - t4 + 1) >> 7) - sub) << shl;
- dst[40] = (((t7 - t3 + 1) >> 7) - sub) << shl;
- dst[48] = (((t6 - t2 + 1) >> 7) - sub) << shl;
- dst[56] = (((t5 - t1 + 1) >> 7) - sub) << shl;
+ dst[ 0] = (t5 + t1) >> 7;
+ dst[ 8] = (t6 + t2) >> 7;
+ dst[16] = (t7 + t3) >> 7;
+ dst[24] = (t8 + t4) >> 7;
+ dst[32] = (t8 - t4 + 1) >> 7;
+ dst[40] = (t7 - t3 + 1) >> 7;
+ dst[48] = (t6 - t2 + 1) >> 7;
+ dst[56] = (t5 - t1 + 1) >> 7;
src++;
dst++;
}
}
-static void vc1_inv_trans_8x8_add_c(uint8_t *dest, int linesize, DCTELEM *block)
-{
- vc1_inv_trans_8x8_c(block, 0, 0);
- ff_add_pixels_clamped_c(block, dest, linesize);
-}
-
-static void vc1_inv_trans_8x8_put_signed_c(uint8_t *dest, int linesize, DCTELEM *block)
-{
- vc1_inv_trans_8x8_c(block, 0, 0);
- ff_put_signed_pixels_clamped_c(block, dest, linesize);
-}
-
-static void vc1_inv_trans_8x8_put_signed_rangered_c(uint8_t *dest, int linesize, DCTELEM *block)
-{
- vc1_inv_trans_8x8_c(block, 1, 0);
- ff_put_signed_pixels_clamped_c(block, dest, linesize);
-}
-
-static void vc1_inv_trans_8x8_put_c(uint8_t *dest, int linesize, DCTELEM *block)
-{
- vc1_inv_trans_8x8_c(block, 0, 0);
- ff_put_pixels_clamped_c(block, dest, linesize);
-}
-
-static void vc1_inv_trans_8x8_put_rangered_c(uint8_t *dest, int linesize, DCTELEM *block)
-{
- vc1_inv_trans_8x8_c(block, 1, 64);
- ff_put_pixels_clamped_c(block, dest, linesize);
-}
-
/** Do inverse transform on 8x4 part of block
*/
static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
@@ -692,11 +714,7 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
}
av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
- dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_c;
- dsp->vc1_inv_trans_8x8_put_signed[0] = vc1_inv_trans_8x8_put_signed_c;
- dsp->vc1_inv_trans_8x8_put_signed[1] = vc1_inv_trans_8x8_put_signed_rangered_c;
- dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_c;
- dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_rangered_c;
+ dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
@@ -706,6 +724,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c;
dsp->vc1_h_overlap = vc1_h_overlap_c;
dsp->vc1_v_overlap = vc1_v_overlap_c;
+ dsp->vc1_h_s_overlap = vc1_h_s_overlap_c;
+ dsp->vc1_v_s_overlap = vc1_v_s_overlap_c;
dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c;
dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c;
dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c;
diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
index db9d892a23..93a9ea3858 100644
--- a/libavcodec/vc1dsp.h
+++ b/libavcodec/vc1dsp.h
@@ -30,13 +30,9 @@
#include "dsputil.h"
-typedef void (*vc1_idct_func)(uint8_t *dest, int line_size, DCTELEM *block);
-
typedef struct VC1DSPContext {
/* vc1 functions */
- vc1_idct_func vc1_inv_trans_8x8_add;
- vc1_idct_func vc1_inv_trans_8x8_put_signed[2];
- vc1_idct_func vc1_inv_trans_8x8_put[2];
+ void (*vc1_inv_trans_8x8)(DCTELEM *b);
void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
@@ -44,8 +40,10 @@ typedef struct VC1DSPContext {
void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
- void (*vc1_v_overlap)(uint8_t* src, int stride);
- void (*vc1_h_overlap)(uint8_t* src, int stride);
+ void (*vc1_v_overlap)(uint8_t *src, int stride);
+ void (*vc1_h_overlap)(uint8_t *src, int stride);
+ void (*vc1_v_s_overlap)(DCTELEM *top, DCTELEM *bottom);
+ void (*vc1_h_s_overlap)(DCTELEM *left, DCTELEM *right);
void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);