aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-12-14 23:58:10 +0100
committerMichael Niedermayer <michaelni@gmx.at>2011-12-14 23:58:10 +0100
commite462257242fc037c99206457d1316e1ff9e5306f (patch)
tree045910517a8b587f7a016b1c46403e1d1021f4f2 /libavcodec
parenta1be5bc79d7ac4c7c7ed79c4d72b4f1945ecb55c (diff)
parent115a57302a7d6661426304bec3a5bc72d0edf4b0 (diff)
downloadffmpeg-e462257242fc037c99206457d1316e1ff9e5306f.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: (23 commits) applehttp: Properly clean up if unable to probe a segment applehttp: Avoid reading uninitialized memory fate: Replace misleading "aac" in the name of an ADTS test with "adts". fate: Drop pointless "-an" from pictor test command. fate: split off image codec FATE tests into their own file fate: split off WMA codec FATE tests into their own file fate: split off lossless video and audio FATE tests into their own files fate: split off qtrle codec FATE tests into their own file fate: split off Ut Video codec FATE tests into their own file fate: split off screen codec FATE tests into their own file fate: split off Real Inc. codec FATE tests into their own file fate: split off AC-3 codec FATE tests into their own file mpegvideo: remove abort() in ff_find_unused_picture() rv40: NEON optimised loop filter strength selection rv40: rearrange loop filter functions configure: cosmetics: sort some lists where appropriate swscale_mmx: drop no longer required parameters from VSCALEX macros swscale: Mark yuv2planeX_8_mmx as MMX2; it contains MMX2 instructions. build: conditionally compile x86 H.264 chroma optimizations v410 encoder and decoder ... Conflicts: Changelog configure doc/developer.texi doc/general.texi libavcodec/arm/asm.S libavcodec/avcodec.h libavcodec/v410dec.c libavcodec/v410enc.c libavcodec/version.h libavcodec/x86/Makefile libavcodec/x86/dsputil_mmx.c libswscale/x86/swscale_mmx.c tests/Makefile tests/fate2.mak Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/arm/asm.S8
-rw-r--r--libavcodec/arm/rv40dsp_init_neon.c10
-rw-r--r--libavcodec/arm/rv40dsp_neon.S86
-rw-r--r--libavcodec/avcodec.h10
-rw-r--r--libavcodec/h261dec.c2
-rw-r--r--libavcodec/h263dec.c2
-rw-r--r--libavcodec/mpegvideo.c22
-rw-r--r--libavcodec/mpegvideo_enc.c6
-rw-r--r--libavcodec/rv34dsp.h21
-rw-r--r--libavcodec/rv40.c69
-rw-r--r--libavcodec/rv40dsp.c296
-rw-r--r--libavcodec/vc1dec.c2
-rw-r--r--libavcodec/version.h2
-rw-r--r--libavcodec/x86/Makefile5
-rw-r--r--libavcodec/x86/dsputil_mmx.c22
15 files changed, 377 insertions, 186 deletions
diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S
index d711cb8f11..2daac59242 100644
--- a/libavcodec/arm/asm.S
+++ b/libavcodec/arm/asm.S
@@ -113,10 +113,10 @@ T add \rn, \rn, \rm
T ldr \rt, [\rn]
.endm
-.macro ldr_dpren rt, rn, rm:vararg
-A ldr \rt, [\rn, -\rm]
-T sub \rt, \rn, \rm
-T ldr \rt, [\rt]
+.macro ldr_dpre rt, rn, rm:vararg
+A ldr \rt, [\rn, -\rm]!
+T sub \rn, \rn, \rm
+T ldr \rt, [\rn]
.endm
.macro ldr_post rt, rn, rm:vararg
diff --git a/libavcodec/arm/rv40dsp_init_neon.c b/libavcodec/arm/rv40dsp_init_neon.c
index 36d75e6fd8..59dddb6605 100644
--- a/libavcodec/arm/rv40dsp_init_neon.c
+++ b/libavcodec/arm/rv40dsp_init_neon.c
@@ -54,6 +54,13 @@ void ff_avg_rv40_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_rv40_weight_func_16_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int);
void ff_rv40_weight_func_8_neon(uint8_t *, uint8_t *, uint8_t *, int, int, int);
+int ff_rv40_h_loop_filter_strength_neon(uint8_t *src, int stride,
+ int beta, int beta2, int edge,
+ int *p1, int *q1);
+int ff_rv40_v_loop_filter_strength_neon(uint8_t *src, int stride,
+ int beta, int beta2, int edge,
+ int *p1, int *q1);
+
void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
{
c->put_pixels_tab[0][ 1] = ff_put_rv40_qpel16_mc10_neon;
@@ -116,4 +123,7 @@ void ff_rv40dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
c->rv40_weight_pixels_tab[0] = ff_rv40_weight_func_16_neon;
c->rv40_weight_pixels_tab[1] = ff_rv40_weight_func_8_neon;
+
+ c->rv40_loop_filter_strength[0] = ff_rv40_h_loop_filter_strength_neon;
+ c->rv40_loop_filter_strength[1] = ff_rv40_v_loop_filter_strength_neon;
}
diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S
index 07ba8428c1..a4313d89f9 100644
--- a/libavcodec/arm/rv40dsp_neon.S
+++ b/libavcodec/arm/rv40dsp_neon.S
@@ -722,3 +722,89 @@ function ff_rv40_weight_func_8_neon, export=1
bne 1b
bx lr
endfunc
+
+function ff_rv40_h_loop_filter_strength_neon, export=1
+ pkhbt r2, r3, r2, lsl #18
+
+ ldr r3, [r0]
+ ldr_dpre r12, r0, r1
+ teq r3, r12
+ beq 1f
+
+ sub r0, r0, r1, lsl #1
+
+ vld1.32 {d4[]}, [r0,:32], r1 @ -3
+ vld1.32 {d0[]}, [r0,:32], r1 @ -2
+ vld1.32 {d4[1]}, [r0,:32], r1 @ -1
+ vld1.32 {d5[]}, [r0,:32], r1 @ 0
+ vld1.32 {d1[]}, [r0,:32], r1 @ 1
+ vld1.32 {d5[0]}, [r0,:32], r1 @ 2
+
+ vpaddl.u8 q8, q0 @ -2, -2, -2, -2, 1, 1, 1, 1
+ vpaddl.u8 q9, q2 @ -3, -3, -1, -1, 2, 2, 0, 0
+ vdup.32 d30, r2 @ beta2, beta << 2
+ vpadd.u16 d16, d16, d17 @ -2, -2, 1, 1
+ vpadd.u16 d18, d18, d19 @ -3, -1, 2, 0
+ vabd.u16 d16, d18, d16
+ vclt.u16 d16, d16, d30
+
+ ldrd r2, r3, [sp, #4]
+ vmovl.u16 q12, d16
+ vtrn.16 d16, d17
+ vshr.u32 q12, q12, #15
+ ldr r0, [sp]
+ vst1.32 {d24[1]}, [r2,:32]
+ vst1.32 {d25[1]}, [r3,:32]
+
+ cmp r0, #0
+ it eq
+ bxeq lr
+
+ vand d18, d16, d17
+ vtrn.32 d18, d19
+ vand d18, d18, d19
+ vmov.u16 r0, d18[0]
+ bx lr
+1:
+ ldrd r2, r3, [sp, #4]
+ mov r0, #0
+ str r0, [r2]
+ str r0, [r3]
+ bx lr
+endfunc
+
+function ff_rv40_v_loop_filter_strength_neon, export=1
+ sub r0, r0, #3
+ pkhbt r2, r3, r2, lsl #18
+
+ vld1.8 {d0}, [r0], r1
+ vld1.8 {d1}, [r0], r1
+ vld1.8 {d2}, [r0], r1
+ vld1.8 {d3}, [r0], r1
+
+ vaddl.u8 q0, d0, d1
+ vaddl.u8 q1, d2, d3
+ vdup.32 q15, r2
+ vadd.u16 q0, q0, q1 @ -3, -2, -1, 0, 1, 2
+ vext.16 q1, q0, q0, #1 @ -2, -1, 0, 1, 2
+ vabd.u16 q0, q1, q0
+ vclt.u16 q0, q0, q15
+
+ ldrd r2, r3, [sp, #4]
+ vmovl.u16 q1, d0
+ vext.16 d1, d0, d1, #3
+ vshr.u32 q1, q1, #15
+ ldr r0, [sp]
+ vst1.32 {d2[1]}, [r2,:32]
+ vst1.32 {d3[1]}, [r3,:32]
+
+ cmp r0, #0
+ it eq
+ bxeq lr
+
+ vand d0, d0, d1
+ vtrn.16 d0, d1
+ vand d0, d0, d1
+ vmov.u16 r0, d0[0]
+ bx lr
+endfunc
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 4290f8b4be..5a0e0ed380 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -4629,16 +4629,8 @@ int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width,
unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
/**
-<<<<<<< HEAD
- * Logs a generic warning message about a missing feature. This function is
- * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
-||||||| merged common ancestors
- * Logs a generic warning message about a missing feature. This function is
- * intended to be used internally by Libav (libavcodec, libavformat, etc.)
-=======
* Log a generic warning message about a missing feature. This function is
- * intended to be used internally by Libav (libavcodec, libavformat, etc.)
->>>>>>> qatar/master
+ * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
* only, and would normally not be used by applications.
* @param[in] avc a pointer to an arbitrary struct of which the first field is
* a pointer to an AVClass struct
diff --git a/libavcodec/h261dec.c b/libavcodec/h261dec.c
index 9b34690e17..ff3d05c514 100644
--- a/libavcodec/h261dec.c
+++ b/libavcodec/h261dec.c
@@ -572,6 +572,8 @@ retry:
//we need to set current_picture_ptr before reading the header, otherwise we cannot store anyting im there
if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) {
int i= ff_find_unused_picture(s, 0);
+ if (i < 0)
+ return i;
s->current_picture_ptr= &s->picture[i];
}
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index 9bad8e6c74..0d741d80c2 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -410,6 +410,8 @@ retry:
* otherwise we cannot store anyting in there */
if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) {
int i= ff_find_unused_picture(s, 0);
+ if (i < 0)
+ return i;
s->current_picture_ptr= &s->picture[i];
}
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 2b792642c5..8924046dc4 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1095,21 +1095,7 @@ int ff_find_unused_picture(MpegEncContext *s, int shared)
}
}
- av_log(s->avctx, AV_LOG_FATAL,
- "Internal error, picture buffer overflow\n");
- /* We could return -1, but the codec would crash trying to draw into a
- * non-existing frame anyway. This is safer than waiting for a random crash.
- * Also the return of this is never useful, an encoder must only allocate
- * as much as allowed in the specification. This has no relationship to how
- * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
- * enough for such valid streams).
- * Plus, a decoder has to check stream validity and remove frames if too
- * many reference frames are around. Waiting for "OOM" is not correct at
- * all. Similarly, missing reference frames have to be replaced by
- * interpolated/MC frames, anything else is a bug in the codec ...
- */
- abort();
- return -1;
+ return AVERROR_INVALIDDATA;
}
static void update_noise_reduction(MpegEncContext *s){
@@ -1167,6 +1153,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header)
else{
i= ff_find_unused_picture(s, 0);
+ if (i < 0)
+ return i;
pic= &s->picture[i];
}
@@ -1222,6 +1210,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
/* Allocate a dummy frame */
i= ff_find_unused_picture(s, 0);
+ if (i < 0)
+ return i;
s->last_picture_ptr= &s->picture[i];
s->last_picture_ptr->f.key_frame = 0;
if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0)
@@ -1238,6 +1228,8 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
if ((s->next_picture_ptr == NULL || s->next_picture_ptr->f.data[0] == NULL) && s->pict_type == AV_PICTURE_TYPE_B) {
/* Allocate a dummy frame */
i= ff_find_unused_picture(s, 0);
+ if (i < 0)
+ return i;
s->next_picture_ptr= &s->picture[i];
s->next_picture_ptr->f.key_frame = 0;
if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0)
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 40dea427d4..83c4932d5b 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -864,6 +864,8 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
if(direct){
i= ff_find_unused_picture(s, 1);
+ if (i < 0)
+ return i;
pic= (AVFrame*)&s->picture[i];
pic->reference= 3;
@@ -877,6 +879,8 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
}
}else{
i= ff_find_unused_picture(s, 0);
+ if (i < 0)
+ return i;
pic= (AVFrame*)&s->picture[i];
pic->reference= 3;
@@ -1210,6 +1214,8 @@ no_output_pic:
// input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
int i= ff_find_unused_picture(s, 0);
+ if (i < 0)
+ return i;
Picture *pic= &s->picture[i];
pic->f.reference = s->reordered_input_picture[0]->f.reference;
diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h
index cf6e14d305..01352ea793 100644
--- a/libavcodec/rv34dsp.h
+++ b/libavcodec/rv34dsp.h
@@ -36,10 +36,18 @@ typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
typedef void (*rv34_inv_transform_func)(DCTELEM *block);
-typedef void (*rv40_loop_filter_func)(uint8_t *src, int stride, int dmode,
- int lim_q1, int lim_p1, int alpha,
- int beta, int beta2, int chroma,
- int edge);
+typedef void (*rv40_weak_loop_filter_func)(uint8_t *src, int stride,
+ int filter_p1, int filter_q1,
+ int alpha, int beta,
+ int lims, int lim_q1, int lim_p1);
+
+typedef void (*rv40_strong_loop_filter_func)(uint8_t *src, int stride,
+ int alpha, int lims,
+ int dmode, int chroma);
+
+typedef int (*rv40_loop_filter_strength_func)(uint8_t *src, int stride,
+ int beta, int beta2, int edge,
+ int *p1, int *q1);
typedef struct RV34DSPContext {
qpel_mc_func put_pixels_tab[4][16];
@@ -49,8 +57,9 @@ typedef struct RV34DSPContext {
rv40_weight_func rv40_weight_pixels_tab[2];
rv34_inv_transform_func rv34_inv_transform_tab[2];
void (*rv34_dequant4x4)(DCTELEM *block, int Qdc, int Q);
- rv40_loop_filter_func rv40_h_loop_filter;
- rv40_loop_filter_func rv40_v_loop_filter;
+ rv40_weak_loop_filter_func rv40_weak_loop_filter[2];
+ rv40_strong_loop_filter_func rv40_strong_loop_filter[2];
+ rv40_loop_filter_strength_func rv40_loop_filter_strength[2];
} RV34DSPContext;
void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp);
diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
index 8dd78f1fb8..fe104c1908 100644
--- a/libavcodec/rv40.c
+++ b/libavcodec/rv40.c
@@ -297,6 +297,34 @@ enum RV40BlockPos{
static const int neighbour_offs_x[4] = { 0, 0, -1, 0 };
static const int neighbour_offs_y[4] = { 0, -1, 0, 1 };
+static void rv40_adaptive_loop_filter(RV34DSPContext *rdsp,
+ uint8_t *src, int stride, int dmode,
+ int lim_q1, int lim_p1,
+ int alpha, int beta, int beta2,
+ int chroma, int edge, int dir)
+{
+ int filter_p1, filter_q1;
+ int strong;
+ int lims;
+
+ strong = rdsp->rv40_loop_filter_strength[dir](src, stride, beta, beta2,
+ edge, &filter_p1, &filter_q1);
+
+ lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1;
+
+ if (strong) {
+ rdsp->rv40_strong_loop_filter[dir](src, stride, alpha,
+ lims, dmode, chroma);
+ } else if (filter_p1 & filter_q1) {
+ rdsp->rv40_weak_loop_filter[dir](src, stride, 1, 1, alpha, beta,
+ lims, lim_q1, lim_p1);
+ } else if (filter_p1 | filter_q1) {
+ rdsp->rv40_weak_loop_filter[dir](src, stride, filter_p1, filter_q1,
+ alpha, beta, lims >> 1, lim_q1 >> 1,
+ lim_p1 >> 1);
+ }
+}
+
/**
* RV40 loop filtering function
*/
@@ -433,10 +461,11 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
// if bottom block is coded then we can filter its top edge
// (or bottom edge of this block, which is the same)
if(y_h_deblock & (MASK_BOTTOM << ij)){
- r->rdsp.rv40_h_loop_filter(Y+4*s->linesize, s->linesize, dither,
- y_to_deblock & (MASK_BOTTOM << ij) ? clip[POS_CUR] : 0,
- clip_cur,
- alpha, beta, betaY, 0, 0);
+ rv40_adaptive_loop_filter(&r->rdsp, Y+4*s->linesize,
+ s->linesize, dither,
+ y_to_deblock & (MASK_BOTTOM << ij) ? clip[POS_CUR] : 0,
+ clip_cur, alpha, beta, betaY,
+ 0, 0, 0);
}
// filter left block edge in ordinary mode (with low filtering strength)
if(y_v_deblock & (MASK_CUR << ij) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){
@@ -444,25 +473,25 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0;
else
clip_left = y_to_deblock & (MASK_CUR << (ij-1)) ? clip[POS_CUR] : 0;
- r->rdsp.rv40_v_loop_filter(Y, s->linesize, dither,
- clip_cur,
- clip_left,
- alpha, beta, betaY, 0, 0);
+ rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither,
+ clip_cur,
+ clip_left,
+ alpha, beta, betaY, 0, 0, 1);
}
// filter top edge of the current macroblock when filtering strength is high
if(!j && y_h_deblock & (MASK_CUR << i) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){
- r->rdsp.rv40_h_loop_filter(Y, s->linesize, dither,
+ rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither,
clip_cur,
mvmasks[POS_TOP] & (MASK_TOP << i) ? clip[POS_TOP] : 0,
- alpha, beta, betaY, 0, 1);
+ alpha, beta, betaY, 0, 1, 0);
}
// filter left block edge in edge mode (with high filtering strength)
if(y_v_deblock & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){
clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0;
- r->rdsp.rv40_v_loop_filter(Y, s->linesize, dither,
+ rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither,
clip_cur,
clip_left,
- alpha, beta, betaY, 0, 1);
+ alpha, beta, betaY, 0, 1, 1);
}
}
}
@@ -474,34 +503,34 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
int clip_cur = c_to_deblock[k] & (MASK_CUR << ij) ? clip[POS_CUR] : 0;
if(c_h_deblock[k] & (MASK_CUR << (ij+2))){
int clip_bot = c_to_deblock[k] & (MASK_CUR << (ij+2)) ? clip[POS_CUR] : 0;
- r->rdsp.rv40_h_loop_filter(C+4*s->uvlinesize, s->uvlinesize, i*8,
+ rv40_adaptive_loop_filter(&r->rdsp, C+4*s->uvlinesize, s->uvlinesize, i*8,
clip_bot,
clip_cur,
- alpha, beta, betaC, 1, 0);
+ alpha, beta, betaC, 1, 0, 0);
}
if((c_v_deblock[k] & (MASK_CUR << ij)) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){
if(!i)
clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0;
else
clip_left = c_to_deblock[k] & (MASK_CUR << (ij-1)) ? clip[POS_CUR] : 0;
- r->rdsp.rv40_v_loop_filter(C, s->uvlinesize, j*8,
+ rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, j*8,
clip_cur,
clip_left,
- alpha, beta, betaC, 1, 0);
+ alpha, beta, betaC, 1, 0, 1);
}
if(!j && c_h_deblock[k] & (MASK_CUR << ij) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){
int clip_top = uvcbp[POS_TOP][k] & (MASK_CUR << (ij+2)) ? clip[POS_TOP] : 0;
- r->rdsp.rv40_h_loop_filter(C, s->uvlinesize, i*8,
+ rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, i*8,
clip_cur,
clip_top,
- alpha, beta, betaC, 1, 1);
+ alpha, beta, betaC, 1, 1, 0);
}
if(c_v_deblock[k] & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){
clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0;
- r->rdsp.rv40_v_loop_filter(C, s->uvlinesize, j*8,
+ rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, j*8,
clip_cur,
clip_left,
- alpha, beta, betaC, 1, 1);
+ alpha, beta, betaC, 1, 1, 1);
}
}
}
diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
index 1fc247e88b..913ced046d 100644
--- a/libavcodec/rv40dsp.c
+++ b/libavcodec/rv40dsp.c
@@ -314,142 +314,194 @@ static const uint8_t rv40_dither_r[16] = {
/**
* weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
*/
-static inline void rv40_weak_loop_filter(uint8_t *src, const int step,
- const int filter_p1, const int filter_q1,
- const int alpha, const int beta,
- const int lim_p0q0,
- const int lim_q1, const int lim_p1,
- const int diff_p1p0, const int diff_q1q0,
- const int diff_p1p2, const int diff_q1q2)
+static av_always_inline void rv40_weak_loop_filter(uint8_t *src,
+ const int step,
+ const int stride,
+ const int filter_p1,
+ const int filter_q1,
+ const int alpha,
+ const int beta,
+ const int lim_p0q0,
+ const int lim_q1,
+ const int lim_p1)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
- int t, u, diff;
-
- t = src[0*step] - src[-1*step];
- if(!t)
- return;
- u = (alpha * FFABS(t)) >> 7;
- if(u > 3 - (filter_p1 && filter_q1))
- return;
-
- t <<= 2;
- if(filter_p1 && filter_q1)
- t += src[-2*step] - src[1*step];
- diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
- src[-1*step] = cm[src[-1*step] + diff];
- src[ 0*step] = cm[src[ 0*step] - diff];
- if(FFABS(diff_p1p2) <= beta && filter_p1){
- t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
- src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
+ int i, t, u, diff;
+
+ for (i = 0; i < 4; i++, src += stride) {
+ int diff_p1p0 = src[-2*step] - src[-1*step];
+ int diff_q1q0 = src[ 1*step] - src[ 0*step];
+ int diff_p1p2 = src[-2*step] - src[-3*step];
+ int diff_q1q2 = src[ 1*step] - src[ 2*step];
+
+ t = src[0*step] - src[-1*step];
+ if (!t)
+ continue;
+
+ u = (alpha * FFABS(t)) >> 7;
+ if (u > 3 - (filter_p1 && filter_q1))
+ continue;
+
+ t <<= 2;
+ if (filter_p1 && filter_q1)
+ t += src[-2*step] - src[1*step];
+
+ diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
+ src[-1*step] = cm[src[-1*step] + diff];
+ src[ 0*step] = cm[src[ 0*step] - diff];
+
+ if (filter_p1 && FFABS(diff_p1p2) <= beta) {
+ t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
+ src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
+ }
+
+ if (filter_q1 && FFABS(diff_q1q2) <= beta) {
+ t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
+ src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
+ }
}
- if(FFABS(diff_q1q2) <= beta && filter_q1){
- t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
- src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
+}
+
+static void rv40_h_weak_loop_filter(uint8_t *src, const int stride,
+ const int filter_p1, const int filter_q1,
+ const int alpha, const int beta,
+ const int lim_p0q0, const int lim_q1,
+ const int lim_p1)
+{
+ rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1,
+ alpha, beta, lim_p0q0, lim_q1, lim_p1);
+}
+
+static void rv40_v_weak_loop_filter(uint8_t *src, const int stride,
+ const int filter_p1, const int filter_q1,
+ const int alpha, const int beta,
+ const int lim_p0q0, const int lim_q1,
+ const int lim_p1)
+{
+ rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1,
+ alpha, beta, lim_p0q0, lim_q1, lim_p1);
+}
+
+static av_always_inline void rv40_strong_loop_filter(uint8_t *src,
+ const int step,
+ const int stride,
+ const int alpha,
+ const int lims,
+ const int dmode,
+ const int chroma)
+{
+ int i;
+
+ for(i = 0; i < 4; i++, src += stride){
+ int sflag, p0, q0, p1, q1;
+ int t = src[0*step] - src[-1*step];
+
+ if (!t)
+ continue;
+
+ sflag = (alpha * FFABS(t)) >> 7;
+ if (sflag > 1)
+ continue;
+
+ p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
+ 26*src[ 0*step] + 25*src[ 1*step] +
+ rv40_dither_l[dmode + i]) >> 7;
+
+ q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
+ 26*src[ 1*step] + 25*src[ 2*step] +
+ rv40_dither_r[dmode + i]) >> 7;
+
+ if (sflag) {
+ p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
+ q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
+ }
+
+ p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
+ 25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
+ q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
+ 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
+
+ if (sflag) {
+ p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
+ q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
+ }
+
+ src[-2*step] = p1;
+ src[-1*step] = p0;
+ src[ 0*step] = q0;
+ src[ 1*step] = q1;
+
+ if(!chroma){
+ src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
+ 51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
+ src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
+ 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
+ }
}
}
-static av_always_inline void rv40_adaptive_loop_filter(uint8_t *src, const int step,
- const int stride, const int dmode,
- const int lim_q1, const int lim_p1,
- const int alpha,
- const int beta, const int beta2,
- const int chroma, const int edge)
+static void rv40_h_strong_loop_filter(uint8_t *src, const int stride,
+ const int alpha, const int lims,
+ const int dmode, const int chroma)
+{
+ rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma);
+}
+
+static void rv40_v_strong_loop_filter(uint8_t *src, const int stride,
+ const int alpha, const int lims,
+ const int dmode, const int chroma)
+{
+ rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma);
+}
+
+static av_always_inline int rv40_loop_filter_strength(uint8_t *src,
+ int step, int stride,
+ int beta, int beta2,
+ int edge,
+ int *p1, int *q1)
{
- int diff_p1p0[4], diff_q1q0[4], diff_p1p2[4], diff_q1q2[4];
int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
+ int strong0 = 0, strong1 = 0;
uint8_t *ptr;
- int flag_strong0 = 1, flag_strong1 = 1;
- int filter_p1, filter_q1;
int i;
- int lims;
- for(i = 0, ptr = src; i < 4; i++, ptr += stride){
- diff_p1p0[i] = ptr[-2*step] - ptr[-1*step];
- diff_q1q0[i] = ptr[ 1*step] - ptr[ 0*step];
- sum_p1p0 += diff_p1p0[i];
- sum_q1q0 += diff_q1q0[i];
- }
- filter_p1 = FFABS(sum_p1p0) < (beta<<2);
- filter_q1 = FFABS(sum_q1q0) < (beta<<2);
- if(!filter_p1 && !filter_q1)
- return;
-
- for(i = 0, ptr = src; i < 4; i++, ptr += stride){
- diff_p1p2[i] = ptr[-2*step] - ptr[-3*step];
- diff_q1q2[i] = ptr[ 1*step] - ptr[ 2*step];
- sum_p1p2 += diff_p1p2[i];
- sum_q1q2 += diff_q1q2[i];
+ for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
+ sum_p1p0 += ptr[-2*step] - ptr[-1*step];
+ sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
}
- if(edge){
- flag_strong0 = filter_p1 && (FFABS(sum_p1p2) < beta2);
- flag_strong1 = filter_q1 && (FFABS(sum_q1q2) < beta2);
- }else{
- flag_strong0 = flag_strong1 = 0;
- }
+ *p1 = FFABS(sum_p1p0) < (beta << 2);
+ *q1 = FFABS(sum_q1q0) < (beta << 2);
- lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1;
- if(flag_strong0 && flag_strong1){ /* strong filtering */
- for(i = 0; i < 4; i++, src += stride){
- int sflag, p0, q0, p1, q1;
- int t = src[0*step] - src[-1*step];
-
- if(!t) continue;
- sflag = (alpha * FFABS(t)) >> 7;
- if(sflag > 1) continue;
-
- p0 = (25*src[-3*step] + 26*src[-2*step]
- + 26*src[-1*step]
- + 26*src[ 0*step] + 25*src[ 1*step] + rv40_dither_l[dmode + i]) >> 7;
- q0 = (25*src[-2*step] + 26*src[-1*step]
- + 26*src[ 0*step]
- + 26*src[ 1*step] + 25*src[ 2*step] + rv40_dither_r[dmode + i]) >> 7;
- if(sflag){
- p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
- q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
- }
- p1 = (25*src[-4*step] + 26*src[-3*step]
- + 26*src[-2*step]
- + 26*p0 + 25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
- q1 = (25*src[-1*step] + 26*q0
- + 26*src[ 1*step]
- + 26*src[ 2*step] + 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
- if(sflag){
- p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
- q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
- }
- src[-2*step] = p1;
- src[-1*step] = p0;
- src[ 0*step] = q0;
- src[ 1*step] = q1;
- if(!chroma){
- src[-3*step] = (25*src[-1*step] + 26*src[-2*step] + 51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
- src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] + 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
- }
- }
- }else if(filter_p1 && filter_q1){
- for(i = 0; i < 4; i++, src += stride)
- rv40_weak_loop_filter(src, step, 1, 1, alpha, beta, lims, lim_q1, lim_p1,
- diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], diff_q1q2[i]);
- }else{
- for(i = 0; i < 4; i++, src += stride)
- rv40_weak_loop_filter(src, step, filter_p1, filter_q1,
- alpha, beta, lims>>1, lim_q1>>1, lim_p1>>1,
- diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], diff_q1q2[i]);
+ if(!*p1 && !*q1)
+ return 0;
+
+ if (!edge)
+ return 0;
+
+ for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
+ sum_p1p2 += ptr[-2*step] - ptr[-3*step];
+ sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
}
+
+ strong0 = *p1 && (FFABS(sum_p1p2) < beta2);
+ strong1 = *q1 && (FFABS(sum_q1q2) < beta2);
+
+ return strong0 && strong1;
}
-static void rv40_v_loop_filter(uint8_t *src, int stride, int dmode,
- int lim_q1, int lim_p1,
- int alpha, int beta, int beta2, int chroma, int edge){
- rv40_adaptive_loop_filter(src, 1, stride, dmode, lim_q1, lim_p1,
- alpha, beta, beta2, chroma, edge);
+static int rv40_h_loop_filter_strength(uint8_t *src, int stride,
+ int beta, int beta2, int edge,
+ int *p1, int *q1)
+{
+ return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1);
}
-static void rv40_h_loop_filter(uint8_t *src, int stride, int dmode,
- int lim_q1, int lim_p1,
- int alpha, int beta, int beta2, int chroma, int edge){
- rv40_adaptive_loop_filter(src, stride, 1, dmode, lim_q1, lim_p1,
- alpha, beta, beta2, chroma, edge);
+
+static int rv40_v_loop_filter_strength(uint8_t *src, int stride,
+ int beta, int beta2, int edge,
+ int *p1, int *q1)
+{
+ return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, q1);
}
av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
@@ -529,8 +581,12 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
c->rv40_weight_pixels_tab[0] = rv40_weight_func_16;
c->rv40_weight_pixels_tab[1] = rv40_weight_func_8;
- c->rv40_h_loop_filter = rv40_h_loop_filter;
- c->rv40_v_loop_filter = rv40_v_loop_filter;
+ c->rv40_weak_loop_filter[0] = rv40_h_weak_loop_filter;
+ c->rv40_weak_loop_filter[1] = rv40_v_weak_loop_filter;
+ c->rv40_strong_loop_filter[0] = rv40_h_strong_loop_filter;
+ c->rv40_strong_loop_filter[1] = rv40_v_strong_loop_filter;
+ c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength;
+ c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength;
if (HAVE_MMX)
ff_rv40dsp_init_x86(c, dsp);
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 4dab137eb7..771574df78 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -5582,6 +5582,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
* otherwise we cannot store anything in there. */
if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) {
int i = ff_find_unused_picture(s, 0);
+ if (i < 0)
+ goto err;
s->current_picture_ptr = &s->picture[i];
}
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 870d7e0a30..84d4dadfa1 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -21,7 +21,7 @@
#define AVCODEC_VERSION_H
#define LIBAVCODEC_VERSION_MAJOR 53
-#define LIBAVCODEC_VERSION_MINOR 44
+#define LIBAVCODEC_VERSION_MINOR 45
#define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 4b85514676..48ff39b0a8 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -11,6 +11,9 @@ YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \
YASM-OBJS-$(CONFIG_DWT) += x86/dwt_yasm.o
+YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \
+ x86/h264_chromamc_10bit.o
+
MMX-OBJS-$(CONFIG_H264DSP) += x86/h264dsp_mmx.o
YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
x86/h264_deblock_10bit.o \
@@ -59,8 +62,6 @@ MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
x86/deinterlace.o \
x86/fmtconvert.o \
- x86/h264_chromamc.o \
- x86/h264_chromamc_10bit.o \
x86/h264_qpel_10bit.o \
$(YASM-OBJS-yes)
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index a27c3b53d6..75bcae8110 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2548,7 +2548,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#if HAVE_YASM
- if (!high_bit_depth) {
+ if (!high_bit_depth && CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
}
@@ -2652,13 +2652,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
#if HAVE_YASM
- if (!high_bit_depth) {
+ if (!high_bit_depth && CONFIG_H264CHROMA) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
}
- if (bit_depth == 10) {
+ if (bit_depth == 10 && CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_10_mmxext;
c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_10_mmxext;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_10_mmxext;
@@ -2728,7 +2728,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
#if HAVE_YASM
- if (!high_bit_depth) {
+ if (!high_bit_depth && CONFIG_H264CHROMA) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
}
@@ -2781,8 +2781,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS_10(2, 0, sse2_cache64)
H264_QPEL_FUNCS_10(3, 0, sse2_cache64)
- c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2;
- c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2;
+ if (CONFIG_H264CHROMA) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
+ }
}
#endif
}
@@ -2808,7 +2810,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS_10(2, 0, ssse3_cache64)
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64)
}
- if (!high_bit_depth) {
+ if (!high_bit_depth && CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
@@ -2909,8 +2911,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS_10(2, 0, sse2)
H264_QPEL_FUNCS_10(3, 0, sse2)
- c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx;
- c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx;
+ if (CONFIG_H264CHROMA) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
+ }
}
c->butterflies_float_interleave = ff_butterflies_float_interleave_avx;
}